From 4dede80c5c993f78deb97b7960c74c0c26934480 Mon Sep 17 00:00:00 2001 From: Agustin Gutierrez Date: Fri, 19 Apr 2024 13:53:52 -0400 Subject: [PATCH 001/216] drm/amd/display: Fix DSC-re-computing [ Upstream commit b9b5a82c532109a09f4340ef5cabdfdbb0691a9d ] [Why] This fixes a bug introduced by commit c53655545141 ("drm/amd/display: dsc mst re-compute pbn for changes on hub"). The change caused light-up issues with a second display that required DSC on some MST docks. [How] Use Virtual DPCD for DSC caps in MST case. [Limitations] This change only affects MST DSC devices that follow specifications additional changes are required to check for old MST DSC devices such as ones which do not check for Virtual DPCD registers. Reviewed-by: Swapnil Patel Reviewed-by: Hersen Wu Acked-by: Tom Chung Signed-off-by: Agustin Gutierrez Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Stable-dep-of: 4641169a8c95 ("drm/amd/display: Fix incorrect DSC recompute trigger") Signed-off-by: Sasha Levin (cherry picked from commit 3f9f631f9b910c4aeafbeaee6ef08a3c193f0c29) --- .../display/amdgpu_dm/amdgpu_dm_mst_types.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 9ec9792f115a8..b4bbd3be35a6e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -1219,10 +1219,6 @@ static bool is_dsc_need_re_compute( if (dc_link->type != dc_connection_mst_branch) return false; - if (!(dc_link->dpcd_caps.dsc_caps.dsc_basic_caps.fields.dsc_support.DSC_SUPPORT || - dc_link->dpcd_caps.dsc_caps.dsc_basic_caps.fields.dsc_support.DSC_PASSTHROUGH_SUPPORT)) - return false; - for (i = 0; i < MAX_PIPES; i++) stream_on_link[i] = NULL; @@ -1240,7 +1236,19 @@ static bool is_dsc_need_re_compute( continue; aconnector = (struct amdgpu_dm_connector *) stream->dm_stream_context; - if (!aconnector) + if (!aconnector || !aconnector->dsc_aux) + continue; + + /* + * Check if cached virtual MST DSC caps are available and DSC is supported + * this change takes care of newer MST DSC capable devices that report their + * DPCD caps as per specifications in their Virtual DPCD registers. + + * TODO: implement the check for older MST DSC devices that do not conform to + * specifications. + */ + if (!(aconnector->dc_sink->dsc_caps.dsc_dec_caps.is_dsc_supported || + aconnector->dc_link->dpcd_caps.dsc_caps.dsc_basic_caps.fields.dsc_support.DSC_PASSTHROUGH_SUPPORT)) continue; stream_on_link[new_stream_on_link_num] = aconnector; From 8bbcc58aac219a781abd4d6a008a715d5e234742 Mon Sep 17 00:00:00 2001 From: Fangzhi Zuo Date: Wed, 4 Sep 2024 16:56:45 -0400 Subject: [PATCH 002/216] drm/amd/display: Fix incorrect DSC recompute trigger [ Upstream commit 4641169a8c95d9efc35d2d3c55c3948f3b375ff9 ] A stream without dsc_aux should not be eliminated from the dsc determination. Whether it needs a dsc recompute depends on whether its mode has changed or not. Eliminating such a no-dsc stream from the dsc determination policy will end up with inconsistencies in the new dc_state when compared to the current dc_state, triggering a dsc recompute that should not have happened. Reviewed-by: Rodrigo Siqueira Signed-off-by: Fangzhi Zuo Signed-off-by: Aurabindo Pillai Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin (cherry picked from commit e8b8c1ecbd2cce6b0e78c0884c0705668297f84e) --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index b4bbd3be35a6e..385a5a75fdf87 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -1236,7 +1236,7 @@ static bool is_dsc_need_re_compute( continue; aconnector = (struct amdgpu_dm_connector *) stream->dm_stream_context; - if (!aconnector || !aconnector->dsc_aux) + if (!aconnector) continue; /* From 4d78dd3897e41341b02445b0cad7a1cab1c6f0e3 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 18 Nov 2024 07:05:54 +0100 Subject: [PATCH 003/216] docs: media: update location of the media patches [ Upstream commit 72ad4ff638047bbbdf3232178fea4bec1f429319 ] Due to recent changes on the way we're maintaining media, the location of the main tree was updated. Change docs accordingly. Cc: stable@vger.kernel.org Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Hans Verkuil Signed-off-by: Sasha Levin (cherry picked from commit bffaf4cb28102ded8a78ce0f708cda3ead9046c8) --- Documentation/admin-guide/media/building.rst | 2 +- Documentation/admin-guide/media/saa7134.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/media/building.rst b/Documentation/admin-guide/media/building.rst index a064734299163..7a413ba07f93b 100644 --- a/Documentation/admin-guide/media/building.rst +++ b/Documentation/admin-guide/media/building.rst @@ -15,7 +15,7 @@ Please notice, however, that, if: you should use the main media development tree ``master`` branch: - https://git.linuxtv.org/media_tree.git/ + https://git.linuxtv.org/media.git/ In this case, you may find some useful information at the `LinuxTv wiki pages `_: diff --git a/Documentation/admin-guide/media/saa7134.rst b/Documentation/admin-guide/media/saa7134.rst index 51eae7eb5ab7f..18d7cbc897db4 100644 --- a/Documentation/admin-guide/media/saa7134.rst +++ b/Documentation/admin-guide/media/saa7134.rst @@ -67,7 +67,7 @@ Changes / Fixes Please mail to linux-media AT vger.kernel.org unified diffs against the linux media git tree: - https://git.linuxtv.org/media_tree.git/ + https://git.linuxtv.org/media.git/ This is done by committing a patch at a clone of the git tree and submitting the patch using ``git send-email``. Don't forget to From 341c678ed8df1255a7fb2a38403320f3fe559daf Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Tue, 19 Nov 2024 12:21:32 +0100 Subject: [PATCH 004/216] x86/mm: Carve out INVLPG inline asm for use by others [ Upstream commit f1d84b59cbb9547c243d93991acf187fdbe9fbe9 ] No functional changes. Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/ZyulbYuvrkshfsd2@antipodes Signed-off-by: Sasha Levin (cherry picked from commit 8322a66f9369285a68002d2c761b4d38945011b5) --- arch/x86/include/asm/tlb.h | 4 ++++ arch/x86/mm/tlb.c | 3 ++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h index 580636cdc257b..4d3c9d00d6b6b 100644 --- a/arch/x86/include/asm/tlb.h +++ b/arch/x86/include/asm/tlb.h @@ -34,4 +34,8 @@ static inline void __tlb_remove_table(void *table) free_page_and_swap_cache(table); } +static inline void invlpg(unsigned long addr) +{ + asm volatile("invlpg (%0)" ::"r" (addr) : "memory"); +} #endif /* _ASM_X86_TLB_H */ diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 2fbae48f0b470..64f594826a282 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "mm_internal.h" @@ -1145,7 +1146,7 @@ STATIC_NOPV void native_flush_tlb_one_user(unsigned long addr) bool cpu_pcide; /* Flush 'addr' from the kernel PCID: */ - asm volatile("invlpg (%0)" ::"r" (addr) : "memory"); + invlpg(addr); /* If PTI is off there is no user PCID and nothing to flush. */ if (!static_cpu_has(X86_FEATURE_PTI)) From 24cae07559aa744c61eb4fe3d050e8561cda34f6 Mon Sep 17 00:00:00 2001 From: ChenXiaoSong Date: Thu, 22 Aug 2024 08:20:55 +0000 Subject: [PATCH 005/216] smb/client: rename cifs_ntsd to smb_ntsd [ Upstream commit 3651487607ae778df1051a0a38bb34a5bd34e3b7 ] Preparation for moving acl definitions to new common header file. Use the following shell command to rename: find fs/smb/client -type f -exec sed -i \ 's/struct cifs_ntsd/struct smb_ntsd/g' {} + Signed-off-by: ChenXiaoSong Reviewed-by: Namjae Jeon Signed-off-by: Steve French Stable-dep-of: d413eabff18d ("fs/smb/client: implement chmod() for SMB3 POSIX Extensions") Signed-off-by: Sasha Levin (cherry picked from commit 386660bd303ec1f7b73a306f3a8ddf802a56b9b6) --- fs/smb/client/cifsacl.c | 36 ++++++++++++++++++------------------ fs/smb/client/cifsacl.h | 6 +++--- fs/smb/client/cifsglob.h | 12 ++++++------ fs/smb/client/cifsproto.h | 16 ++++++++-------- fs/smb/client/cifssmb.c | 6 +++--- fs/smb/client/smb2ops.c | 14 +++++++------- fs/smb/client/smb2pdu.c | 2 +- fs/smb/client/smb2proto.h | 2 +- fs/smb/client/xattr.c | 4 ++-- 9 files changed, 49 insertions(+), 49 deletions(-) diff --git a/fs/smb/client/cifsacl.c b/fs/smb/client/cifsacl.c index f5b6df82e8570..3f7657475cd93 100644 --- a/fs/smb/client/cifsacl.c +++ b/fs/smb/client/cifsacl.c @@ -515,8 +515,8 @@ exit_cifs_idmap(void) } /* copy ntsd, owner sid, and group sid from a security descriptor to another */ -static __u32 copy_sec_desc(const struct cifs_ntsd *pntsd, - struct cifs_ntsd *pnntsd, +static __u32 copy_sec_desc(const struct smb_ntsd *pntsd, + struct smb_ntsd *pnntsd, __u32 sidsoffset, struct cifs_sid *pownersid, struct cifs_sid *pgrpsid) @@ -527,7 +527,7 @@ static __u32 copy_sec_desc(const struct cifs_ntsd *pntsd, /* copy security descriptor control portion */ pnntsd->revision = pntsd->revision; pnntsd->type = pntsd->type; - pnntsd->dacloffset = cpu_to_le32(sizeof(struct cifs_ntsd)); + pnntsd->dacloffset = cpu_to_le32(sizeof(struct smb_ntsd)); pnntsd->sacloffset = 0; pnntsd->osidoffset = cpu_to_le32(sidsoffset); pnntsd->gsidoffset = cpu_to_le32(sidsoffset + sizeof(struct cifs_sid)); @@ -1191,7 +1191,7 @@ static int parse_sid(struct cifs_sid *psid, char *end_of_acl) /* Convert CIFS ACL to POSIX form */ static int parse_sec_desc(struct cifs_sb_info *cifs_sb, - struct cifs_ntsd *pntsd, int acl_len, struct cifs_fattr *fattr, + struct smb_ntsd *pntsd, int acl_len, struct cifs_fattr *fattr, bool get_mode_from_special_sid) { int rc = 0; @@ -1249,7 +1249,7 @@ static int parse_sec_desc(struct cifs_sb_info *cifs_sb, } /* Convert permission bits from mode to equivalent CIFS ACL */ -static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd, +static int build_sec_desc(struct smb_ntsd *pntsd, struct smb_ntsd *pnntsd, __u32 secdesclen, __u32 *pnsecdesclen, __u64 *pnmode, kuid_t uid, kgid_t gid, bool mode_from_sid, bool id_from_sid, int *aclflag) { @@ -1279,7 +1279,7 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd, le32_to_cpu(pntsd->gsidoffset)); if (pnmode && *pnmode != NO_CHANGE_64) { /* chmod */ - ndacloffset = sizeof(struct cifs_ntsd); + ndacloffset = sizeof(struct smb_ntsd); ndacl_ptr = (struct cifs_acl *)((char *)pnntsd + ndacloffset); ndacl_ptr->revision = dacloffset ? dacl_ptr->revision : cpu_to_le16(ACL_REVISION); @@ -1297,7 +1297,7 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd, *aclflag |= CIFS_ACL_DACL; } else { - ndacloffset = sizeof(struct cifs_ntsd); + ndacloffset = sizeof(struct smb_ntsd); ndacl_ptr = (struct cifs_acl *)((char *)pnntsd + ndacloffset); ndacl_ptr->revision = dacloffset ? dacl_ptr->revision : cpu_to_le16(ACL_REVISION); @@ -1385,11 +1385,11 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd, } #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY -struct cifs_ntsd *get_cifs_acl_by_fid(struct cifs_sb_info *cifs_sb, +struct smb_ntsd *get_cifs_acl_by_fid(struct cifs_sb_info *cifs_sb, const struct cifs_fid *cifsfid, u32 *pacllen, u32 __maybe_unused unused) { - struct cifs_ntsd *pntsd = NULL; + struct smb_ntsd *pntsd = NULL; unsigned int xid; int rc; struct tcon_link *tlink = cifs_sb_tlink(cifs_sb); @@ -1410,10 +1410,10 @@ struct cifs_ntsd *get_cifs_acl_by_fid(struct cifs_sb_info *cifs_sb, return pntsd; } -static struct cifs_ntsd *get_cifs_acl_by_path(struct cifs_sb_info *cifs_sb, +static struct smb_ntsd *get_cifs_acl_by_path(struct cifs_sb_info *cifs_sb, const char *path, u32 *pacllen) { - struct cifs_ntsd *pntsd = NULL; + struct smb_ntsd *pntsd = NULL; int oplock = 0; unsigned int xid; int rc; @@ -1454,11 +1454,11 @@ static struct cifs_ntsd *get_cifs_acl_by_path(struct cifs_sb_info *cifs_sb, } /* Retrieve an ACL from the server */ -struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *cifs_sb, +struct smb_ntsd *get_cifs_acl(struct cifs_sb_info *cifs_sb, struct inode *inode, const char *path, u32 *pacllen, u32 info) { - struct cifs_ntsd *pntsd = NULL; + struct smb_ntsd *pntsd = NULL; struct cifsFileInfo *open_file = NULL; if (inode) @@ -1472,7 +1472,7 @@ struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *cifs_sb, } /* Set an ACL on the server */ -int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen, +int set_cifs_acl(struct smb_ntsd *pnntsd, __u32 acllen, struct inode *inode, const char *path, int aclflag) { int oplock = 0; @@ -1528,7 +1528,7 @@ cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr, struct inode *inode, bool mode_from_special_sid, const char *path, const struct cifs_fid *pfid) { - struct cifs_ntsd *pntsd = NULL; + struct smb_ntsd *pntsd = NULL; u32 acllen = 0; int rc = 0; struct tcon_link *tlink = cifs_sb_tlink(cifs_sb); @@ -1581,8 +1581,8 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 *pnmode, __u32 nsecdesclen = 0; __u32 dacloffset = 0; struct cifs_acl *dacl_ptr = NULL; - struct cifs_ntsd *pntsd = NULL; /* acl obtained from server */ - struct cifs_ntsd *pnntsd = NULL; /* modified acl to be sent to server */ + struct smb_ntsd *pntsd = NULL; /* acl obtained from server */ + struct smb_ntsd *pnntsd = NULL; /* modified acl to be sent to server */ struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct tcon_link *tlink = cifs_sb_tlink(cifs_sb); struct smb_version_operations *ops; @@ -1630,7 +1630,7 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 *pnmode, nsecdesclen += 5 * sizeof(struct cifs_ace); } else { /* chown */ /* When ownership changes, changes new owner sid length could be different */ - nsecdesclen = sizeof(struct cifs_ntsd) + (sizeof(struct cifs_sid) * 2); + nsecdesclen = sizeof(struct smb_ntsd) + (sizeof(struct cifs_sid) * 2); dacloffset = le32_to_cpu(pntsd->dacloffset); if (dacloffset) { dacl_ptr = (struct cifs_acl *)((char *)pntsd + dacloffset); diff --git a/fs/smb/client/cifsacl.h b/fs/smb/client/cifsacl.h index ccbfc754bd3c7..1516545d7f67e 100644 --- a/fs/smb/client/cifsacl.h +++ b/fs/smb/client/cifsacl.h @@ -33,7 +33,7 @@ * Security Descriptor length containing DACL with 3 ACEs (one each for * owner, group and world). */ -#define DEFAULT_SEC_DESC_LEN (sizeof(struct cifs_ntsd) + \ +#define DEFAULT_SEC_DESC_LEN (sizeof(struct smb_ntsd) + \ sizeof(struct cifs_acl) + \ (sizeof(struct cifs_ace) * 4)) @@ -55,7 +55,7 @@ #define SID_STRING_BASE_SIZE (2 + 3 + 15 + 1) #define SID_STRING_SUBAUTH_SIZE (11) /* size of a single subauth string */ -struct cifs_ntsd { +struct smb_ntsd { __le16 revision; /* revision level */ __le16 type; __le32 osidoffset; @@ -194,6 +194,6 @@ struct owner_group_sids { * Minimum security descriptor can be one without any SACL and DACL and can * consist of revision, type, and two sids of minimum size for owner and group */ -#define MIN_SEC_DESC_LEN (sizeof(struct cifs_ntsd) + (2 * MIN_SID_LEN)) +#define MIN_SEC_DESC_LEN (sizeof(struct smb_ntsd) + (2 * MIN_SID_LEN)) #endif /* _CIFSACL_H */ diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 6b57b167a49d8..cf22629bf90b5 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -539,12 +539,12 @@ struct smb_version_operations { int (*set_EA)(const unsigned int, struct cifs_tcon *, const char *, const char *, const void *, const __u16, const struct nls_table *, struct cifs_sb_info *); - struct cifs_ntsd * (*get_acl)(struct cifs_sb_info *, struct inode *, - const char *, u32 *, u32); - struct cifs_ntsd * (*get_acl_by_fid)(struct cifs_sb_info *, - const struct cifs_fid *, u32 *, u32); - int (*set_acl)(struct cifs_ntsd *, __u32, struct inode *, const char *, - int); + struct smb_ntsd * (*get_acl)(struct cifs_sb_info *cifssb, struct inode *ino, + const char *patch, u32 *plen, u32 info); + struct smb_ntsd * (*get_acl_by_fid)(struct cifs_sb_info *cifssmb, + const struct cifs_fid *pfid, u32 *plen, u32 info); + int (*set_acl)(struct smb_ntsd *pntsd, __u32 len, struct inode *ino, const char *path, + int flag); /* writepages retry size */ unsigned int (*wp_retry_size)(struct inode *); /* get mtu credits */ diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index 83692bf60007a..f34c533efe49b 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h @@ -231,16 +231,16 @@ extern int cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, const char *path, const struct cifs_fid *pfid); extern int id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 *pnmode, kuid_t uid, kgid_t gid); -extern struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *, struct inode *, - const char *, u32 *, u32); -extern struct cifs_ntsd *get_cifs_acl_by_fid(struct cifs_sb_info *, - const struct cifs_fid *, u32 *, u32); +extern struct smb_ntsd *get_cifs_acl(struct cifs_sb_info *cifssmb, struct inode *ino, + const char *path, u32 *plen, u32 info); +extern struct smb_ntsd *get_cifs_acl_by_fid(struct cifs_sb_info *cifssb, + const struct cifs_fid *pfid, u32 *plen, u32 info); extern struct posix_acl *cifs_get_acl(struct mnt_idmap *idmap, struct dentry *dentry, int type); extern int cifs_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, struct posix_acl *acl, int type); -extern int set_cifs_acl(struct cifs_ntsd *, __u32, struct inode *, - const char *, int); +extern int set_cifs_acl(struct smb_ntsd *pntsd, __u32 len, struct inode *ino, + const char *path, int flag); extern unsigned int setup_authusers_ACE(struct cifs_ace *pace); extern unsigned int setup_special_mode_ACE(struct cifs_ace *pace, __u64 nmode); extern unsigned int setup_special_user_owner_ACE(struct cifs_ace *pace); @@ -568,9 +568,9 @@ extern int CIFSSMBSetEA(const unsigned int xid, struct cifs_tcon *tcon, const struct nls_table *nls_codepage, struct cifs_sb_info *cifs_sb); extern int CIFSSMBGetCIFSACL(const unsigned int xid, struct cifs_tcon *tcon, - __u16 fid, struct cifs_ntsd **acl_inf, __u32 *buflen); + __u16 fid, struct smb_ntsd **acl_inf, __u32 *buflen); extern int CIFSSMBSetCIFSACL(const unsigned int, struct cifs_tcon *, __u16, - struct cifs_ntsd *, __u32, int); + struct smb_ntsd *pntsd, __u32 len, int aclflag); extern int cifs_do_get_acl(const unsigned int xid, struct cifs_tcon *tcon, const unsigned char *searchName, struct posix_acl **acl, const int acl_type, diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c index a34db419e46f7..2f8745736dbb0 100644 --- a/fs/smb/client/cifssmb.c +++ b/fs/smb/client/cifssmb.c @@ -3385,7 +3385,7 @@ validate_ntransact(char *buf, char **ppparm, char **ppdata, /* Get Security Descriptor (by handle) from remote server for a file or dir */ int CIFSSMBGetCIFSACL(const unsigned int xid, struct cifs_tcon *tcon, __u16 fid, - struct cifs_ntsd **acl_inf, __u32 *pbuflen) + struct smb_ntsd **acl_inf, __u32 *pbuflen) { int rc = 0; int buf_type = 0; @@ -3455,7 +3455,7 @@ CIFSSMBGetCIFSACL(const unsigned int xid, struct cifs_tcon *tcon, __u16 fid, /* check if buffer is big enough for the acl header followed by the smallest SID */ - if ((*pbuflen < sizeof(struct cifs_ntsd) + 8) || + if ((*pbuflen < sizeof(struct smb_ntsd) + 8) || (*pbuflen >= 64 * 1024)) { cifs_dbg(VFS, "bad acl length %d\n", *pbuflen); rc = -EINVAL; @@ -3475,7 +3475,7 @@ CIFSSMBGetCIFSACL(const unsigned int xid, struct cifs_tcon *tcon, __u16 fid, int CIFSSMBSetCIFSACL(const unsigned int xid, struct cifs_tcon *tcon, __u16 fid, - struct cifs_ntsd *pntsd, __u32 acllen, int aclflag) + struct smb_ntsd *pntsd, __u32 acllen, int aclflag) { __u16 byte_count, param_count, data_count, param_offset, data_offset; int rc = 0; diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 6645f147d57c2..fc6d00344c50e 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -3001,11 +3001,11 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses, return rc; } -static struct cifs_ntsd * +static struct smb_ntsd * get_smb2_acl_by_fid(struct cifs_sb_info *cifs_sb, const struct cifs_fid *cifsfid, u32 *pacllen, u32 info) { - struct cifs_ntsd *pntsd = NULL; + struct smb_ntsd *pntsd = NULL; unsigned int xid; int rc = -EOPNOTSUPP; struct tcon_link *tlink = cifs_sb_tlink(cifs_sb); @@ -3030,11 +3030,11 @@ get_smb2_acl_by_fid(struct cifs_sb_info *cifs_sb, } -static struct cifs_ntsd * +static struct smb_ntsd * get_smb2_acl_by_path(struct cifs_sb_info *cifs_sb, const char *path, u32 *pacllen, u32 info) { - struct cifs_ntsd *pntsd = NULL; + struct smb_ntsd *pntsd = NULL; u8 oplock = SMB2_OPLOCK_LEVEL_NONE; unsigned int xid; int rc; @@ -3097,7 +3097,7 @@ get_smb2_acl_by_path(struct cifs_sb_info *cifs_sb, } static int -set_smb2_acl(struct cifs_ntsd *pnntsd, __u32 acllen, +set_smb2_acl(struct smb_ntsd *pnntsd, __u32 acllen, struct inode *inode, const char *path, int aclflag) { u8 oplock = SMB2_OPLOCK_LEVEL_NONE; @@ -3155,12 +3155,12 @@ set_smb2_acl(struct cifs_ntsd *pnntsd, __u32 acllen, } /* Retrieve an ACL from the server */ -static struct cifs_ntsd * +static struct smb_ntsd * get_smb2_acl(struct cifs_sb_info *cifs_sb, struct inode *inode, const char *path, u32 *pacllen, u32 info) { - struct cifs_ntsd *pntsd = NULL; + struct smb_ntsd *pntsd = NULL; struct cifsFileInfo *open_file = NULL; if (inode && !(info & SACL_SECINFO)) diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 38b26468eb0c5..0a4985bba55ff 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -5626,7 +5626,7 @@ SMB2_set_eof(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, int SMB2_set_acl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, u64 volatile_fid, - struct cifs_ntsd *pnntsd, int pacllen, int aclflag) + struct smb_ntsd *pnntsd, int pacllen, int aclflag) { return send_set_info(xid, tcon, persistent_fid, volatile_fid, current->tgid, 0, SMB2_O_INFO_SECURITY, aclflag, diff --git a/fs/smb/client/smb2proto.h b/fs/smb/client/smb2proto.h index 613667b46c580..fd90d8e5a1d16 100644 --- a/fs/smb/client/smb2proto.h +++ b/fs/smb/client/smb2proto.h @@ -247,7 +247,7 @@ extern int SMB2_set_info_init(struct cifs_tcon *tcon, extern void SMB2_set_info_free(struct smb_rqst *rqst); extern int SMB2_set_acl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, u64 volatile_fid, - struct cifs_ntsd *pnntsd, int pacllen, int aclflag); + struct smb_ntsd *pnntsd, int pacllen, int aclflag); extern int SMB2_set_ea(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, u64 volatile_fid, struct smb2_file_full_ea_info *buf, int len); diff --git a/fs/smb/client/xattr.c b/fs/smb/client/xattr.c index c2bf829310bee..e8696ad4da994 100644 --- a/fs/smb/client/xattr.c +++ b/fs/smb/client/xattr.c @@ -162,7 +162,7 @@ static int cifs_xattr_set(const struct xattr_handler *handler, case XATTR_CIFS_ACL: case XATTR_CIFS_NTSD: case XATTR_CIFS_NTSD_FULL: { - struct cifs_ntsd *pacl; + struct smb_ntsd *pacl; if (!value) goto out; @@ -315,7 +315,7 @@ static int cifs_xattr_get(const struct xattr_handler *handler, * fetch owner and DACL otherwise */ u32 acllen, extra_info; - struct cifs_ntsd *pacl; + struct smb_ntsd *pacl; if (pTcon->ses->server->ops->get_acl == NULL) goto out; /* rc already EOPNOTSUPP */ From fc52f6831a1b4a07370336b6495f559f8a968328 Mon Sep 17 00:00:00 2001 From: ChenXiaoSong Date: Thu, 22 Aug 2024 08:20:56 +0000 Subject: [PATCH 006/216] smb/client: rename cifs_sid to smb_sid [ Upstream commit 7f599d8fb3e087aff5be4e1392baaae3f8d42419 ] Preparation for moving acl definitions to new common header file. Use the following shell command to rename: find fs/smb/client -type f -exec sed -i \ 's/struct cifs_sid/struct smb_sid/g' {} + Signed-off-by: ChenXiaoSong Reviewed-by: Namjae Jeon Signed-off-by: Steve French Stable-dep-of: d413eabff18d ("fs/smb/client: implement chmod() for SMB3 POSIX Extensions") Signed-off-by: Sasha Levin (cherry picked from commit 46c22d37f691985a790a42be4a1bc619047c6de4) --- fs/smb/client/cifsacl.c | 96 +++++++++++++++++++-------------------- fs/smb/client/cifsacl.h | 6 +-- fs/smb/client/cifsglob.h | 8 ++-- fs/smb/client/cifsproto.h | 2 +- fs/smb/client/smb2inode.c | 4 +- fs/smb/client/smb2pdu.c | 2 +- fs/smb/client/smb2pdu.h | 8 ++-- 7 files changed, 63 insertions(+), 63 deletions(-) diff --git a/fs/smb/client/cifsacl.c b/fs/smb/client/cifsacl.c index 3f7657475cd93..dd399f9a74242 100644 --- a/fs/smb/client/cifsacl.c +++ b/fs/smb/client/cifsacl.c @@ -27,18 +27,18 @@ #include "cifs_unicode.h" /* security id for everyone/world system group */ -static const struct cifs_sid sid_everyone = { +static const struct smb_sid sid_everyone = { 1, 1, {0, 0, 0, 0, 0, 1}, {0} }; /* security id for Authenticated Users system group */ -static const struct cifs_sid sid_authusers = { +static const struct smb_sid sid_authusers = { 1, 1, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(11)} }; /* S-1-22-1 Unmapped Unix users */ -static const struct cifs_sid sid_unix_users = {1, 1, {0, 0, 0, 0, 0, 22}, +static const struct smb_sid sid_unix_users = {1, 1, {0, 0, 0, 0, 0, 22}, {cpu_to_le32(1), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} }; /* S-1-22-2 Unmapped Unix groups */ -static const struct cifs_sid sid_unix_groups = { 1, 1, {0, 0, 0, 0, 0, 22}, +static const struct smb_sid sid_unix_groups = { 1, 1, {0, 0, 0, 0, 0, 22}, {cpu_to_le32(2), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} }; /* @@ -48,17 +48,17 @@ static const struct cifs_sid sid_unix_groups = { 1, 1, {0, 0, 0, 0, 0, 22}, /* S-1-5-88 MS NFS and Apple style UID/GID/mode */ /* S-1-5-88-1 Unix uid */ -static const struct cifs_sid sid_unix_NFS_users = { 1, 2, {0, 0, 0, 0, 0, 5}, +static const struct smb_sid sid_unix_NFS_users = { 1, 2, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(88), cpu_to_le32(1), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} }; /* S-1-5-88-2 Unix gid */ -static const struct cifs_sid sid_unix_NFS_groups = { 1, 2, {0, 0, 0, 0, 0, 5}, +static const struct smb_sid sid_unix_NFS_groups = { 1, 2, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(88), cpu_to_le32(2), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} }; /* S-1-5-88-3 Unix mode */ -static const struct cifs_sid sid_unix_NFS_mode = { 1, 2, {0, 0, 0, 0, 0, 5}, +static const struct smb_sid sid_unix_NFS_mode = { 1, 2, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(88), cpu_to_le32(3), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} }; @@ -106,7 +106,7 @@ static struct key_type cifs_idmap_key_type = { }; static char * -sid_to_key_str(struct cifs_sid *sidptr, unsigned int type) +sid_to_key_str(struct smb_sid *sidptr, unsigned int type) { int i, len; unsigned int saval; @@ -158,7 +158,7 @@ sid_to_key_str(struct cifs_sid *sidptr, unsigned int type) * the same returns zero, if they do not match returns non-zero. */ static int -compare_sids(const struct cifs_sid *ctsid, const struct cifs_sid *cwsid) +compare_sids(const struct smb_sid *ctsid, const struct smb_sid *cwsid) { int i; int num_subauth, num_sat, num_saw; @@ -204,11 +204,11 @@ compare_sids(const struct cifs_sid *ctsid, const struct cifs_sid *cwsid) } static bool -is_well_known_sid(const struct cifs_sid *psid, uint32_t *puid, bool is_group) +is_well_known_sid(const struct smb_sid *psid, uint32_t *puid, bool is_group) { int i; int num_subauth; - const struct cifs_sid *pwell_known_sid; + const struct smb_sid *pwell_known_sid; if (!psid || (puid == NULL)) return false; @@ -260,7 +260,7 @@ is_well_known_sid(const struct cifs_sid *psid, uint32_t *puid, bool is_group) } static __u16 -cifs_copy_sid(struct cifs_sid *dst, const struct cifs_sid *src) +cifs_copy_sid(struct smb_sid *dst, const struct smb_sid *src) { int i; __u16 size = 1 + 1 + 6; @@ -277,11 +277,11 @@ cifs_copy_sid(struct cifs_sid *dst, const struct cifs_sid *src) } static int -id_to_sid(unsigned int cid, uint sidtype, struct cifs_sid *ssid) +id_to_sid(unsigned int cid, uint sidtype, struct smb_sid *ssid) { int rc; struct key *sidkey; - struct cifs_sid *ksid; + struct smb_sid *ksid; unsigned int ksid_size; char desc[3 + 10 + 1]; /* 3 byte prefix + 10 bytes for value + NULL */ const struct cred *saved_cred; @@ -312,8 +312,8 @@ id_to_sid(unsigned int cid, uint sidtype, struct cifs_sid *ssid) * it could be. */ ksid = sidkey->datalen <= sizeof(sidkey->payload) ? - (struct cifs_sid *)&sidkey->payload : - (struct cifs_sid *)sidkey->payload.data[0]; + (struct smb_sid *)&sidkey->payload : + (struct smb_sid *)sidkey->payload.data[0]; ksid_size = CIFS_SID_BASE_SIZE + (ksid->num_subauth * sizeof(__le32)); if (ksid_size > sidkey->datalen) { @@ -336,7 +336,7 @@ id_to_sid(unsigned int cid, uint sidtype, struct cifs_sid *ssid) } int -sid_to_id(struct cifs_sb_info *cifs_sb, struct cifs_sid *psid, +sid_to_id(struct cifs_sb_info *cifs_sb, struct smb_sid *psid, struct cifs_fattr *fattr, uint sidtype) { int rc = 0; @@ -518,11 +518,11 @@ exit_cifs_idmap(void) static __u32 copy_sec_desc(const struct smb_ntsd *pntsd, struct smb_ntsd *pnntsd, __u32 sidsoffset, - struct cifs_sid *pownersid, - struct cifs_sid *pgrpsid) + struct smb_sid *pownersid, + struct smb_sid *pgrpsid) { - struct cifs_sid *owner_sid_ptr, *group_sid_ptr; - struct cifs_sid *nowner_sid_ptr, *ngroup_sid_ptr; + struct smb_sid *owner_sid_ptr, *group_sid_ptr; + struct smb_sid *nowner_sid_ptr, *ngroup_sid_ptr; /* copy security descriptor control portion */ pnntsd->revision = pntsd->revision; @@ -530,28 +530,28 @@ static __u32 copy_sec_desc(const struct smb_ntsd *pntsd, pnntsd->dacloffset = cpu_to_le32(sizeof(struct smb_ntsd)); pnntsd->sacloffset = 0; pnntsd->osidoffset = cpu_to_le32(sidsoffset); - pnntsd->gsidoffset = cpu_to_le32(sidsoffset + sizeof(struct cifs_sid)); + pnntsd->gsidoffset = cpu_to_le32(sidsoffset + sizeof(struct smb_sid)); /* copy owner sid */ if (pownersid) owner_sid_ptr = pownersid; else - owner_sid_ptr = (struct cifs_sid *)((char *)pntsd + + owner_sid_ptr = (struct smb_sid *)((char *)pntsd + le32_to_cpu(pntsd->osidoffset)); - nowner_sid_ptr = (struct cifs_sid *)((char *)pnntsd + sidsoffset); + nowner_sid_ptr = (struct smb_sid *)((char *)pnntsd + sidsoffset); cifs_copy_sid(nowner_sid_ptr, owner_sid_ptr); /* copy group sid */ if (pgrpsid) group_sid_ptr = pgrpsid; else - group_sid_ptr = (struct cifs_sid *)((char *)pntsd + + group_sid_ptr = (struct smb_sid *)((char *)pntsd + le32_to_cpu(pntsd->gsidoffset)); - ngroup_sid_ptr = (struct cifs_sid *)((char *)pnntsd + sidsoffset + - sizeof(struct cifs_sid)); + ngroup_sid_ptr = (struct smb_sid *)((char *)pnntsd + sidsoffset + + sizeof(struct smb_sid)); cifs_copy_sid(ngroup_sid_ptr, group_sid_ptr); - return sidsoffset + (2 * sizeof(struct cifs_sid)); + return sidsoffset + (2 * sizeof(struct smb_sid)); } @@ -666,7 +666,7 @@ static void mode_to_access_flags(umode_t mode, umode_t bits_to_use, return; } -static __u16 cifs_copy_ace(struct cifs_ace *dst, struct cifs_ace *src, struct cifs_sid *psid) +static __u16 cifs_copy_ace(struct cifs_ace *dst, struct cifs_ace *src, struct smb_sid *psid) { __u16 size = 1 + 1 + 2 + 4; @@ -686,7 +686,7 @@ static __u16 cifs_copy_ace(struct cifs_ace *dst, struct cifs_ace *src, struct ci } static __u16 fill_ace_for_sid(struct cifs_ace *pntace, - const struct cifs_sid *psid, __u64 nmode, + const struct smb_sid *psid, __u64 nmode, umode_t bits, __u8 access_type, bool allow_delete_child) { @@ -759,7 +759,7 @@ static void dump_ace(struct cifs_ace *pace, char *end_of_acl) #endif static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl, - struct cifs_sid *pownersid, struct cifs_sid *pgrpsid, + struct smb_sid *pownersid, struct smb_sid *pgrpsid, struct cifs_fattr *fattr, bool mode_from_special_sid) { int i; @@ -930,8 +930,8 @@ unsigned int setup_special_user_owner_ACE(struct cifs_ace *pntace) } static void populate_new_aces(char *nacl_base, - struct cifs_sid *pownersid, - struct cifs_sid *pgrpsid, + struct smb_sid *pownersid, + struct smb_sid *pgrpsid, __u64 *pnmode, u32 *pnum_aces, u16 *pnsize, bool modefromsid) { @@ -967,7 +967,7 @@ static void populate_new_aces(char *nacl_base, * updated in the inode. */ - if (!memcmp(pownersid, pgrpsid, sizeof(struct cifs_sid))) { + if (!memcmp(pownersid, pgrpsid, sizeof(struct smb_sid))) { /* * Case when owner and group SIDs are the same. * Set the more restrictive of the two modes. @@ -1035,8 +1035,8 @@ static void populate_new_aces(char *nacl_base, } static __u16 replace_sids_and_copy_aces(struct cifs_acl *pdacl, struct cifs_acl *pndacl, - struct cifs_sid *pownersid, struct cifs_sid *pgrpsid, - struct cifs_sid *pnownersid, struct cifs_sid *pngrpsid) + struct smb_sid *pownersid, struct smb_sid *pgrpsid, + struct smb_sid *pnownersid, struct smb_sid *pngrpsid) { int i; u16 size = 0; @@ -1075,7 +1075,7 @@ static __u16 replace_sids_and_copy_aces(struct cifs_acl *pdacl, struct cifs_acl } static int set_chmod_dacl(struct cifs_acl *pdacl, struct cifs_acl *pndacl, - struct cifs_sid *pownersid, struct cifs_sid *pgrpsid, + struct smb_sid *pownersid, struct smb_sid *pgrpsid, __u64 *pnmode, bool mode_from_sid) { int i; @@ -1156,7 +1156,7 @@ static int set_chmod_dacl(struct cifs_acl *pdacl, struct cifs_acl *pndacl, return 0; } -static int parse_sid(struct cifs_sid *psid, char *end_of_acl) +static int parse_sid(struct smb_sid *psid, char *end_of_acl) { /* BB need to add parm so we can store the SID BB */ @@ -1195,7 +1195,7 @@ static int parse_sec_desc(struct cifs_sb_info *cifs_sb, bool get_mode_from_special_sid) { int rc = 0; - struct cifs_sid *owner_sid_ptr, *group_sid_ptr; + struct smb_sid *owner_sid_ptr, *group_sid_ptr; struct cifs_acl *dacl_ptr; /* no need for SACL ptr */ char *end_of_acl = ((char *)pntsd) + acl_len; __u32 dacloffset; @@ -1203,9 +1203,9 @@ static int parse_sec_desc(struct cifs_sb_info *cifs_sb, if (pntsd == NULL) return -EIO; - owner_sid_ptr = (struct cifs_sid *)((char *)pntsd + + owner_sid_ptr = (struct smb_sid *)((char *)pntsd + le32_to_cpu(pntsd->osidoffset)); - group_sid_ptr = (struct cifs_sid *)((char *)pntsd + + group_sid_ptr = (struct smb_sid *)((char *)pntsd + le32_to_cpu(pntsd->gsidoffset)); dacloffset = le32_to_cpu(pntsd->dacloffset); dacl_ptr = (struct cifs_acl *)((char *)pntsd + dacloffset); @@ -1257,8 +1257,8 @@ static int build_sec_desc(struct smb_ntsd *pntsd, struct smb_ntsd *pnntsd, __u32 dacloffset; __u32 ndacloffset; __u32 sidsoffset; - struct cifs_sid *owner_sid_ptr, *group_sid_ptr; - struct cifs_sid *nowner_sid_ptr = NULL, *ngroup_sid_ptr = NULL; + struct smb_sid *owner_sid_ptr, *group_sid_ptr; + struct smb_sid *nowner_sid_ptr = NULL, *ngroup_sid_ptr = NULL; struct cifs_acl *dacl_ptr = NULL; /* no need for SACL ptr */ struct cifs_acl *ndacl_ptr = NULL; /* no need for SACL ptr */ char *end_of_acl = ((char *)pntsd) + secdesclen; @@ -1273,9 +1273,9 @@ static int build_sec_desc(struct smb_ntsd *pntsd, struct smb_ntsd *pnntsd, } } - owner_sid_ptr = (struct cifs_sid *)((char *)pntsd + + owner_sid_ptr = (struct smb_sid *)((char *)pntsd + le32_to_cpu(pntsd->osidoffset)); - group_sid_ptr = (struct cifs_sid *)((char *)pntsd + + group_sid_ptr = (struct smb_sid *)((char *)pntsd + le32_to_cpu(pntsd->gsidoffset)); if (pnmode && *pnmode != NO_CHANGE_64) { /* chmod */ @@ -1305,7 +1305,7 @@ static int build_sec_desc(struct smb_ntsd *pntsd, struct smb_ntsd *pnntsd, if (uid_valid(uid)) { /* chown */ uid_t id; - nowner_sid_ptr = kzalloc(sizeof(struct cifs_sid), + nowner_sid_ptr = kzalloc(sizeof(struct smb_sid), GFP_KERNEL); if (!nowner_sid_ptr) { rc = -ENOMEM; @@ -1334,7 +1334,7 @@ static int build_sec_desc(struct smb_ntsd *pntsd, struct smb_ntsd *pnntsd, } if (gid_valid(gid)) { /* chgrp */ gid_t id; - ngroup_sid_ptr = kzalloc(sizeof(struct cifs_sid), + ngroup_sid_ptr = kzalloc(sizeof(struct smb_sid), GFP_KERNEL); if (!ngroup_sid_ptr) { rc = -ENOMEM; @@ -1630,7 +1630,7 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 *pnmode, nsecdesclen += 5 * sizeof(struct cifs_ace); } else { /* chown */ /* When ownership changes, changes new owner sid length could be different */ - nsecdesclen = sizeof(struct smb_ntsd) + (sizeof(struct cifs_sid) * 2); + nsecdesclen = sizeof(struct smb_ntsd) + (sizeof(struct smb_sid) * 2); dacloffset = le32_to_cpu(pntsd->dacloffset); if (dacloffset) { dacl_ptr = (struct cifs_acl *)((char *)pntsd + dacloffset); diff --git a/fs/smb/client/cifsacl.h b/fs/smb/client/cifsacl.h index 1516545d7f67e..6a38718220fcb 100644 --- a/fs/smb/client/cifsacl.h +++ b/fs/smb/client/cifsacl.h @@ -64,14 +64,14 @@ struct smb_ntsd { __le32 dacloffset; } __attribute__((packed)); -struct cifs_sid { +struct smb_sid { __u8 revision; /* revision level */ __u8 num_subauth; __u8 authority[NUM_AUTHS]; __le32 sub_auth[SID_MAX_SUB_AUTHORITIES]; /* sub_auth[num_subauth] */ } __attribute__((packed)); -/* size of a struct cifs_sid, sans sub_auth array */ +/* size of a struct smb_sid, sans sub_auth array */ #define CIFS_SID_BASE_SIZE (1 + 1 + NUM_AUTHS) struct cifs_acl { @@ -116,7 +116,7 @@ struct cifs_ace { __u8 flags; __le16 size; __le32 access_req; - struct cifs_sid sid; /* ie UUID of user or group who gets these perms */ + struct smb_sid sid; /* ie UUID of user or group who gets these perms */ } __attribute__((packed)); /* diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index cf22629bf90b5..69d850b6b37fa 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -202,8 +202,8 @@ struct cifs_cred { int gid; int mode; int cecount; - struct cifs_sid osid; - struct cifs_sid gsid; + struct smb_sid osid; + struct smb_sid gsid; struct cifs_ntace *ntaces; struct cifs_ace *aces; }; @@ -231,8 +231,8 @@ struct cifs_open_info_data { unsigned int eas_len; } wsl; char *symlink_target; - struct cifs_sid posix_owner; - struct cifs_sid posix_group; + struct smb_sid posix_owner; + struct smb_sid posix_group; union { struct smb2_file_all_info fi; struct smb311_posix_qinfo posix_fi; diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index f34c533efe49b..059e506ccf5b3 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h @@ -223,7 +223,7 @@ extern int cifs_set_file_info(struct inode *inode, struct iattr *attrs, extern int cifs_rename_pending_delete(const char *full_path, struct dentry *dentry, const unsigned int xid); -extern int sid_to_id(struct cifs_sb_info *cifs_sb, struct cifs_sid *psid, +extern int sid_to_id(struct cifs_sb_info *cifs_sb, struct smb_sid *psid, struct cifs_fattr *fattr, uint sidtype); extern int cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr, struct inode *inode, diff --git a/fs/smb/client/smb2inode.c b/fs/smb/client/smb2inode.c index 2a292736c89a2..e695df1dbb23b 100644 --- a/fs/smb/client/smb2inode.c +++ b/fs/smb/client/smb2inode.c @@ -315,7 +315,7 @@ static int smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, SMB2_O_INFO_FILE, 0, sizeof(struct smb311_posix_qinfo *) + (PATH_MAX * 2) + - (sizeof(struct cifs_sid) * 2), 0, NULL); + (sizeof(struct smb_sid) * 2), 0, NULL); } else { rc = SMB2_query_info_init(tcon, server, &rqst[num_rqst], @@ -325,7 +325,7 @@ static int smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, SMB2_O_INFO_FILE, 0, sizeof(struct smb311_posix_qinfo *) + (PATH_MAX * 2) + - (sizeof(struct cifs_sid) * 2), 0, NULL); + (sizeof(struct smb_sid) * 2), 0, NULL); } if (!rc && (!cfile || num_rqst > 1)) { smb2_set_next_command(tcon, &rqst[num_rqst]); diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 0a4985bba55ff..42f950ae10fbd 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -3915,7 +3915,7 @@ SMB311_posix_query_info(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, u64 volatile_fid, struct smb311_posix_qinfo *data, u32 *plen) { size_t output_len = sizeof(struct smb311_posix_qinfo *) + - (sizeof(struct cifs_sid) * 2) + (PATH_MAX * 2); + (sizeof(struct smb_sid) * 2) + (PATH_MAX * 2); *plen = 0; return query_info(xid, tcon, persistent_fid, volatile_fid, diff --git a/fs/smb/client/smb2pdu.h b/fs/smb/client/smb2pdu.h index 5c458ab3b05a4..076d9e83e1a04 100644 --- a/fs/smb/client/smb2pdu.h +++ b/fs/smb/client/smb2pdu.h @@ -364,8 +364,8 @@ struct create_posix_rsp { u32 nlink; u32 reparse_tag; u32 mode; - struct cifs_sid owner; /* var-sized on the wire */ - struct cifs_sid group; /* var-sized on the wire */ + struct smb_sid owner; /* var-sized on the wire */ + struct smb_sid group; /* var-sized on the wire */ } __packed; #define SMB2_QUERY_DIRECTORY_IOV_SIZE 2 @@ -408,8 +408,8 @@ struct smb2_posix_info { struct smb2_posix_info_parsed { const struct smb2_posix_info *base; size_t size; - struct cifs_sid owner; - struct cifs_sid group; + struct smb_sid owner; + struct smb_sid group; int name_len; const u8 *name; }; From 67687c7e68b470075f200a0877395614652f6711 Mon Sep 17 00:00:00 2001 From: ChenXiaoSong Date: Thu, 22 Aug 2024 08:20:57 +0000 Subject: [PATCH 007/216] smb/client: rename cifs_acl to smb_acl [ Upstream commit 251b93ae73805b216e84ed2190b525f319da4c87 ] Preparation for moving acl definitions to new common header file. Use the following shell command to rename: find fs/smb/client -type f -exec sed -i \ 's/struct cifs_acl/struct smb_acl/g' {} + Signed-off-by: ChenXiaoSong Reviewed-by: Namjae Jeon Signed-off-by: Steve French Stable-dep-of: d413eabff18d ("fs/smb/client: implement chmod() for SMB3 POSIX Extensions") Signed-off-by: Sasha Levin (cherry picked from commit 298e73ac323a2bde8a2efe77b44fae235126a633) --- fs/smb/client/cifsacl.c | 34 +++++++++++++++++----------------- fs/smb/client/cifsacl.h | 4 ++-- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/fs/smb/client/cifsacl.c b/fs/smb/client/cifsacl.c index dd399f9a74242..2e1c9b528dde6 100644 --- a/fs/smb/client/cifsacl.c +++ b/fs/smb/client/cifsacl.c @@ -758,7 +758,7 @@ static void dump_ace(struct cifs_ace *pace, char *end_of_acl) } #endif -static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl, +static void parse_dacl(struct smb_acl *pdacl, char *end_of_acl, struct smb_sid *pownersid, struct smb_sid *pgrpsid, struct cifs_fattr *fattr, bool mode_from_special_sid) { @@ -793,7 +793,7 @@ static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl, fattr->cf_mode &= ~(0777); acl_base = (char *)pdacl; - acl_size = sizeof(struct cifs_acl); + acl_size = sizeof(struct smb_acl); num_aces = le32_to_cpu(pdacl->num_aces); if (num_aces > 0) { @@ -1034,7 +1034,7 @@ static void populate_new_aces(char *nacl_base, *pnsize = nsize; } -static __u16 replace_sids_and_copy_aces(struct cifs_acl *pdacl, struct cifs_acl *pndacl, +static __u16 replace_sids_and_copy_aces(struct smb_acl *pdacl, struct smb_acl *pndacl, struct smb_sid *pownersid, struct smb_sid *pgrpsid, struct smb_sid *pnownersid, struct smb_sid *pngrpsid) { @@ -1049,11 +1049,11 @@ static __u16 replace_sids_and_copy_aces(struct cifs_acl *pdacl, struct cifs_acl u16 ace_size = 0; acl_base = (char *)pdacl; - size = sizeof(struct cifs_acl); + size = sizeof(struct smb_acl); src_num_aces = le32_to_cpu(pdacl->num_aces); nacl_base = (char *)pndacl; - nsize = sizeof(struct cifs_acl); + nsize = sizeof(struct smb_acl); /* Go through all the ACEs */ for (i = 0; i < src_num_aces; ++i) { @@ -1074,7 +1074,7 @@ static __u16 replace_sids_and_copy_aces(struct cifs_acl *pdacl, struct cifs_acl return nsize; } -static int set_chmod_dacl(struct cifs_acl *pdacl, struct cifs_acl *pndacl, +static int set_chmod_dacl(struct smb_acl *pdacl, struct smb_acl *pndacl, struct smb_sid *pownersid, struct smb_sid *pgrpsid, __u64 *pnmode, bool mode_from_sid) { @@ -1091,7 +1091,7 @@ static int set_chmod_dacl(struct cifs_acl *pdacl, struct cifs_acl *pndacl, /* Assuming that pndacl and pnmode are never NULL */ nacl_base = (char *)pndacl; - nsize = sizeof(struct cifs_acl); + nsize = sizeof(struct smb_acl); /* If pdacl is NULL, we don't have a src. Simply populate new ACL. */ if (!pdacl) { @@ -1103,7 +1103,7 @@ static int set_chmod_dacl(struct cifs_acl *pdacl, struct cifs_acl *pndacl, } acl_base = (char *)pdacl; - size = sizeof(struct cifs_acl); + size = sizeof(struct smb_acl); src_num_aces = le32_to_cpu(pdacl->num_aces); /* Retain old ACEs which we can retain */ @@ -1196,7 +1196,7 @@ static int parse_sec_desc(struct cifs_sb_info *cifs_sb, { int rc = 0; struct smb_sid *owner_sid_ptr, *group_sid_ptr; - struct cifs_acl *dacl_ptr; /* no need for SACL ptr */ + struct smb_acl *dacl_ptr; /* no need for SACL ptr */ char *end_of_acl = ((char *)pntsd) + acl_len; __u32 dacloffset; @@ -1208,7 +1208,7 @@ static int parse_sec_desc(struct cifs_sb_info *cifs_sb, group_sid_ptr = (struct smb_sid *)((char *)pntsd + le32_to_cpu(pntsd->gsidoffset)); dacloffset = le32_to_cpu(pntsd->dacloffset); - dacl_ptr = (struct cifs_acl *)((char *)pntsd + dacloffset); + dacl_ptr = (struct smb_acl *)((char *)pntsd + dacloffset); cifs_dbg(NOISY, "revision %d type 0x%x ooffset 0x%x goffset 0x%x sacloffset 0x%x dacloffset 0x%x\n", pntsd->revision, pntsd->type, le32_to_cpu(pntsd->osidoffset), le32_to_cpu(pntsd->gsidoffset), @@ -1259,14 +1259,14 @@ static int build_sec_desc(struct smb_ntsd *pntsd, struct smb_ntsd *pnntsd, __u32 sidsoffset; struct smb_sid *owner_sid_ptr, *group_sid_ptr; struct smb_sid *nowner_sid_ptr = NULL, *ngroup_sid_ptr = NULL; - struct cifs_acl *dacl_ptr = NULL; /* no need for SACL ptr */ - struct cifs_acl *ndacl_ptr = NULL; /* no need for SACL ptr */ + struct smb_acl *dacl_ptr = NULL; /* no need for SACL ptr */ + struct smb_acl *ndacl_ptr = NULL; /* no need for SACL ptr */ char *end_of_acl = ((char *)pntsd) + secdesclen; u16 size = 0; dacloffset = le32_to_cpu(pntsd->dacloffset); if (dacloffset) { - dacl_ptr = (struct cifs_acl *)((char *)pntsd + dacloffset); + dacl_ptr = (struct smb_acl *)((char *)pntsd + dacloffset); if (end_of_acl < (char *)dacl_ptr + le16_to_cpu(dacl_ptr->size)) { cifs_dbg(VFS, "Server returned illegal ACL size\n"); return -EINVAL; @@ -1280,7 +1280,7 @@ static int build_sec_desc(struct smb_ntsd *pntsd, struct smb_ntsd *pnntsd, if (pnmode && *pnmode != NO_CHANGE_64) { /* chmod */ ndacloffset = sizeof(struct smb_ntsd); - ndacl_ptr = (struct cifs_acl *)((char *)pnntsd + ndacloffset); + ndacl_ptr = (struct smb_acl *)((char *)pnntsd + ndacloffset); ndacl_ptr->revision = dacloffset ? dacl_ptr->revision : cpu_to_le16(ACL_REVISION); @@ -1298,7 +1298,7 @@ static int build_sec_desc(struct smb_ntsd *pntsd, struct smb_ntsd *pnntsd, *aclflag |= CIFS_ACL_DACL; } else { ndacloffset = sizeof(struct smb_ntsd); - ndacl_ptr = (struct cifs_acl *)((char *)pnntsd + ndacloffset); + ndacl_ptr = (struct smb_acl *)((char *)pnntsd + ndacloffset); ndacl_ptr->revision = dacloffset ? dacl_ptr->revision : cpu_to_le16(ACL_REVISION); ndacl_ptr->num_aces = dacl_ptr ? dacl_ptr->num_aces : 0; @@ -1580,7 +1580,7 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 *pnmode, __u32 secdesclen = 0; __u32 nsecdesclen = 0; __u32 dacloffset = 0; - struct cifs_acl *dacl_ptr = NULL; + struct smb_acl *dacl_ptr = NULL; struct smb_ntsd *pntsd = NULL; /* acl obtained from server */ struct smb_ntsd *pnntsd = NULL; /* modified acl to be sent to server */ struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); @@ -1633,7 +1633,7 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 *pnmode, nsecdesclen = sizeof(struct smb_ntsd) + (sizeof(struct smb_sid) * 2); dacloffset = le32_to_cpu(pntsd->dacloffset); if (dacloffset) { - dacl_ptr = (struct cifs_acl *)((char *)pntsd + dacloffset); + dacl_ptr = (struct smb_acl *)((char *)pntsd + dacloffset); if (mode_from_sid) nsecdesclen += le32_to_cpu(dacl_ptr->num_aces) * sizeof(struct cifs_ace); diff --git a/fs/smb/client/cifsacl.h b/fs/smb/client/cifsacl.h index 6a38718220fcb..a23d59987828d 100644 --- a/fs/smb/client/cifsacl.h +++ b/fs/smb/client/cifsacl.h @@ -34,7 +34,7 @@ * owner, group and world). */ #define DEFAULT_SEC_DESC_LEN (sizeof(struct smb_ntsd) + \ - sizeof(struct cifs_acl) + \ + sizeof(struct smb_acl) + \ (sizeof(struct cifs_ace) * 4)) /* @@ -74,7 +74,7 @@ struct smb_sid { /* size of a struct smb_sid, sans sub_auth array */ #define CIFS_SID_BASE_SIZE (1 + 1 + NUM_AUTHS) -struct cifs_acl { +struct smb_acl { __le16 revision; /* revision level */ __le16 size; __le32 num_aces; From 51ca0b28eaff80d4af82fbccba8adf8ca7bda20a Mon Sep 17 00:00:00 2001 From: ChenXiaoSong Date: Thu, 22 Aug 2024 08:20:58 +0000 Subject: [PATCH 008/216] smb/client: rename cifs_ace to smb_ace [ Upstream commit 09bedafc1e2c5c82aad3cbfe1359e2b0bf752f3a ] Preparation for moving acl definitions to new common header file. Use the following shell command to rename: find fs/smb/client -type f -exec sed -i \ 's/struct cifs_ace/struct smb_ace/g' {} + Signed-off-by: ChenXiaoSong Reviewed-by: Namjae Jeon Signed-off-by: Steve French Stable-dep-of: d413eabff18d ("fs/smb/client: implement chmod() for SMB3 POSIX Extensions") Signed-off-by: Sasha Levin (cherry picked from commit d64429042fef1dc9cdc64e8401c1955d70c2e56c) --- fs/smb/client/cifsacl.c | 62 +++++++++++++++++++-------------------- fs/smb/client/cifsacl.h | 4 +-- fs/smb/client/cifsglob.h | 2 +- fs/smb/client/cifsproto.h | 6 ++-- fs/smb/client/smb2pdu.c | 8 ++--- 5 files changed, 41 insertions(+), 41 deletions(-) diff --git a/fs/smb/client/cifsacl.c b/fs/smb/client/cifsacl.c index 2e1c9b528dde6..e2ec1d9343354 100644 --- a/fs/smb/client/cifsacl.c +++ b/fs/smb/client/cifsacl.c @@ -666,7 +666,7 @@ static void mode_to_access_flags(umode_t mode, umode_t bits_to_use, return; } -static __u16 cifs_copy_ace(struct cifs_ace *dst, struct cifs_ace *src, struct smb_sid *psid) +static __u16 cifs_copy_ace(struct smb_ace *dst, struct smb_ace *src, struct smb_sid *psid) { __u16 size = 1 + 1 + 2 + 4; @@ -685,7 +685,7 @@ static __u16 cifs_copy_ace(struct cifs_ace *dst, struct cifs_ace *src, struct sm return size; } -static __u16 fill_ace_for_sid(struct cifs_ace *pntace, +static __u16 fill_ace_for_sid(struct smb_ace *pntace, const struct smb_sid *psid, __u64 nmode, umode_t bits, __u8 access_type, bool allow_delete_child) @@ -723,7 +723,7 @@ static __u16 fill_ace_for_sid(struct cifs_ace *pntace, #ifdef CONFIG_CIFS_DEBUG2 -static void dump_ace(struct cifs_ace *pace, char *end_of_acl) +static void dump_ace(struct smb_ace *pace, char *end_of_acl) { int num_subauth; @@ -766,7 +766,7 @@ static void parse_dacl(struct smb_acl *pdacl, char *end_of_acl, int num_aces = 0; int acl_size; char *acl_base; - struct cifs_ace **ppace; + struct smb_ace **ppace; /* BB need to add parm so we can store the SID BB */ @@ -799,15 +799,15 @@ static void parse_dacl(struct smb_acl *pdacl, char *end_of_acl, if (num_aces > 0) { umode_t denied_mode = 0; - if (num_aces > ULONG_MAX / sizeof(struct cifs_ace *)) + if (num_aces > ULONG_MAX / sizeof(struct smb_ace *)) return; - ppace = kmalloc_array(num_aces, sizeof(struct cifs_ace *), + ppace = kmalloc_array(num_aces, sizeof(struct smb_ace *), GFP_KERNEL); if (!ppace) return; for (i = 0; i < num_aces; ++i) { - ppace[i] = (struct cifs_ace *) (acl_base + acl_size); + ppace[i] = (struct smb_ace *) (acl_base + acl_size); #ifdef CONFIG_CIFS_DEBUG2 dump_ace(ppace[i], end_of_acl); #endif @@ -849,7 +849,7 @@ static void parse_dacl(struct smb_acl *pdacl, char *end_of_acl, /* memcpy((void *)(&(cifscred->aces[i])), (void *)ppace[i], - sizeof(struct cifs_ace)); */ + sizeof(struct smb_ace)); */ acl_base = (char *)ppace[i]; acl_size = le16_to_cpu(ppace[i]->size); @@ -861,7 +861,7 @@ static void parse_dacl(struct smb_acl *pdacl, char *end_of_acl, return; } -unsigned int setup_authusers_ACE(struct cifs_ace *pntace) +unsigned int setup_authusers_ACE(struct smb_ace *pntace) { int i; unsigned int ace_size = 20; @@ -885,7 +885,7 @@ unsigned int setup_authusers_ACE(struct cifs_ace *pntace) * Fill in the special SID based on the mode. See * https://technet.microsoft.com/en-us/library/hh509017(v=ws.10).aspx */ -unsigned int setup_special_mode_ACE(struct cifs_ace *pntace, __u64 nmode) +unsigned int setup_special_mode_ACE(struct smb_ace *pntace, __u64 nmode) { int i; unsigned int ace_size = 28; @@ -907,7 +907,7 @@ unsigned int setup_special_mode_ACE(struct cifs_ace *pntace, __u64 nmode) return ace_size; } -unsigned int setup_special_user_owner_ACE(struct cifs_ace *pntace) +unsigned int setup_special_user_owner_ACE(struct smb_ace *pntace) { int i; unsigned int ace_size = 28; @@ -944,17 +944,17 @@ static void populate_new_aces(char *nacl_base, __u64 deny_user_mode = 0; __u64 deny_group_mode = 0; bool sticky_set = false; - struct cifs_ace *pnntace = NULL; + struct smb_ace *pnntace = NULL; nmode = *pnmode; num_aces = *pnum_aces; nsize = *pnsize; if (modefromsid) { - pnntace = (struct cifs_ace *) (nacl_base + nsize); + pnntace = (struct smb_ace *) (nacl_base + nsize); nsize += setup_special_mode_ACE(pnntace, nmode); num_aces++; - pnntace = (struct cifs_ace *) (nacl_base + nsize); + pnntace = (struct smb_ace *) (nacl_base + nsize); nsize += setup_authusers_ACE(pnntace); num_aces++; goto set_size; @@ -992,7 +992,7 @@ static void populate_new_aces(char *nacl_base, sticky_set = true; if (deny_user_mode) { - pnntace = (struct cifs_ace *) (nacl_base + nsize); + pnntace = (struct smb_ace *) (nacl_base + nsize); nsize += fill_ace_for_sid(pnntace, pownersid, deny_user_mode, 0700, ACCESS_DENIED, false); num_aces++; @@ -1000,31 +1000,31 @@ static void populate_new_aces(char *nacl_base, /* Group DENY ACE does not conflict with owner ALLOW ACE. Keep in preferred order*/ if (deny_group_mode && !(deny_group_mode & (user_mode >> 3))) { - pnntace = (struct cifs_ace *) (nacl_base + nsize); + pnntace = (struct smb_ace *) (nacl_base + nsize); nsize += fill_ace_for_sid(pnntace, pgrpsid, deny_group_mode, 0070, ACCESS_DENIED, false); num_aces++; } - pnntace = (struct cifs_ace *) (nacl_base + nsize); + pnntace = (struct smb_ace *) (nacl_base + nsize); nsize += fill_ace_for_sid(pnntace, pownersid, user_mode, 0700, ACCESS_ALLOWED, true); num_aces++; /* Group DENY ACE conflicts with owner ALLOW ACE. So keep it after. */ if (deny_group_mode && (deny_group_mode & (user_mode >> 3))) { - pnntace = (struct cifs_ace *) (nacl_base + nsize); + pnntace = (struct smb_ace *) (nacl_base + nsize); nsize += fill_ace_for_sid(pnntace, pgrpsid, deny_group_mode, 0070, ACCESS_DENIED, false); num_aces++; } - pnntace = (struct cifs_ace *) (nacl_base + nsize); + pnntace = (struct smb_ace *) (nacl_base + nsize); nsize += fill_ace_for_sid(pnntace, pgrpsid, group_mode, 0070, ACCESS_ALLOWED, !sticky_set); num_aces++; - pnntace = (struct cifs_ace *) (nacl_base + nsize); + pnntace = (struct smb_ace *) (nacl_base + nsize); nsize += fill_ace_for_sid(pnntace, &sid_everyone, other_mode, 0007, ACCESS_ALLOWED, !sticky_set); num_aces++; @@ -1040,11 +1040,11 @@ static __u16 replace_sids_and_copy_aces(struct smb_acl *pdacl, struct smb_acl *p { int i; u16 size = 0; - struct cifs_ace *pntace = NULL; + struct smb_ace *pntace = NULL; char *acl_base = NULL; u32 src_num_aces = 0; u16 nsize = 0; - struct cifs_ace *pnntace = NULL; + struct smb_ace *pnntace = NULL; char *nacl_base = NULL; u16 ace_size = 0; @@ -1057,8 +1057,8 @@ static __u16 replace_sids_and_copy_aces(struct smb_acl *pdacl, struct smb_acl *p /* Go through all the ACEs */ for (i = 0; i < src_num_aces; ++i) { - pntace = (struct cifs_ace *) (acl_base + size); - pnntace = (struct cifs_ace *) (nacl_base + nsize); + pntace = (struct smb_ace *) (acl_base + size); + pnntace = (struct smb_ace *) (nacl_base + nsize); if (pnownersid && compare_sids(&pntace->sid, pownersid) == 0) ace_size = cifs_copy_ace(pnntace, pntace, pnownersid); @@ -1080,11 +1080,11 @@ static int set_chmod_dacl(struct smb_acl *pdacl, struct smb_acl *pndacl, { int i; u16 size = 0; - struct cifs_ace *pntace = NULL; + struct smb_ace *pntace = NULL; char *acl_base = NULL; u32 src_num_aces = 0; u16 nsize = 0; - struct cifs_ace *pnntace = NULL; + struct smb_ace *pnntace = NULL; char *nacl_base = NULL; u32 num_aces = 0; bool new_aces_set = false; @@ -1108,7 +1108,7 @@ static int set_chmod_dacl(struct smb_acl *pdacl, struct smb_acl *pndacl, /* Retain old ACEs which we can retain */ for (i = 0; i < src_num_aces; ++i) { - pntace = (struct cifs_ace *) (acl_base + size); + pntace = (struct smb_ace *) (acl_base + size); if (!new_aces_set && (pntace->flags & INHERITED_ACE)) { /* Place the new ACEs in between existing explicit and inherited */ @@ -1130,7 +1130,7 @@ static int set_chmod_dacl(struct smb_acl *pdacl, struct smb_acl *pndacl, } /* update the pointer to the next ACE to populate*/ - pnntace = (struct cifs_ace *) (nacl_base + nsize); + pnntace = (struct smb_ace *) (nacl_base + nsize); nsize += cifs_copy_ace(pnntace, pntace, NULL); num_aces++; @@ -1625,9 +1625,9 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 *pnmode, nsecdesclen = secdesclen; if (pnmode && *pnmode != NO_CHANGE_64) { /* chmod */ if (mode_from_sid) - nsecdesclen += 2 * sizeof(struct cifs_ace); + nsecdesclen += 2 * sizeof(struct smb_ace); else /* cifsacl */ - nsecdesclen += 5 * sizeof(struct cifs_ace); + nsecdesclen += 5 * sizeof(struct smb_ace); } else { /* chown */ /* When ownership changes, changes new owner sid length could be different */ nsecdesclen = sizeof(struct smb_ntsd) + (sizeof(struct smb_sid) * 2); @@ -1636,7 +1636,7 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 *pnmode, dacl_ptr = (struct smb_acl *)((char *)pntsd + dacloffset); if (mode_from_sid) nsecdesclen += - le32_to_cpu(dacl_ptr->num_aces) * sizeof(struct cifs_ace); + le32_to_cpu(dacl_ptr->num_aces) * sizeof(struct smb_ace); else /* cifsacl */ nsecdesclen += le16_to_cpu(dacl_ptr->size); } diff --git a/fs/smb/client/cifsacl.h b/fs/smb/client/cifsacl.h index a23d59987828d..cbaed8038e365 100644 --- a/fs/smb/client/cifsacl.h +++ b/fs/smb/client/cifsacl.h @@ -35,7 +35,7 @@ */ #define DEFAULT_SEC_DESC_LEN (sizeof(struct smb_ntsd) + \ sizeof(struct smb_acl) + \ - (sizeof(struct cifs_ace) * 4)) + (sizeof(struct smb_ace) * 4)) /* * Maximum size of a string representation of a SID: @@ -111,7 +111,7 @@ struct smb_acl { #define SUCCESSFUL_ACCESS_ACE_FLAG 0x40 #define FAILED_ACCESS_ACE_FLAG 0x80 -struct cifs_ace { +struct smb_ace { __u8 type; /* see above and MS-DTYP 2.4.4.1 */ __u8 flags; __le16 size; diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 69d850b6b37fa..43b42eca6780c 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -205,7 +205,7 @@ struct cifs_cred { struct smb_sid osid; struct smb_sid gsid; struct cifs_ntace *ntaces; - struct cifs_ace *aces; + struct smb_ace *aces; }; struct cifs_open_info_data { diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index 059e506ccf5b3..6399dbd04625f 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h @@ -241,9 +241,9 @@ extern int cifs_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, struct posix_acl *acl, int type); extern int set_cifs_acl(struct smb_ntsd *pntsd, __u32 len, struct inode *ino, const char *path, int flag); -extern unsigned int setup_authusers_ACE(struct cifs_ace *pace); -extern unsigned int setup_special_mode_ACE(struct cifs_ace *pace, __u64 nmode); -extern unsigned int setup_special_user_owner_ACE(struct cifs_ace *pace); +extern unsigned int setup_authusers_ACE(struct smb_ace *pace); +extern unsigned int setup_special_mode_ACE(struct smb_ace *pace, __u64 nmode); +extern unsigned int setup_special_user_owner_ACE(struct smb_ace *pace); extern void dequeue_mid(struct mid_q_entry *mid, bool malformed); extern int cifs_read_from_socket(struct TCP_Server_Info *server, char *buf, diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 42f950ae10fbd..101c80f22d776 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -2623,7 +2623,7 @@ create_sd_buf(umode_t mode, bool set_owner, unsigned int *len) unsigned int group_offset = 0; struct smb3_acl acl = {}; - *len = round_up(sizeof(struct crt_sd_ctxt) + (sizeof(struct cifs_ace) * 4), 8); + *len = round_up(sizeof(struct crt_sd_ctxt) + (sizeof(struct smb_ace) * 4), 8); if (set_owner) { /* sizeof(struct owner_group_sids) is already multiple of 8 so no need to round */ @@ -2672,21 +2672,21 @@ create_sd_buf(umode_t mode, bool set_owner, unsigned int *len) ptr += sizeof(struct smb3_acl); /* create one ACE to hold the mode embedded in reserved special SID */ - acelen = setup_special_mode_ACE((struct cifs_ace *)ptr, (__u64)mode); + acelen = setup_special_mode_ACE((struct smb_ace *)ptr, (__u64)mode); ptr += acelen; acl_size = acelen + sizeof(struct smb3_acl); ace_count = 1; if (set_owner) { /* we do not need to reallocate buffer to add the two more ACEs. plenty of space */ - acelen = setup_special_user_owner_ACE((struct cifs_ace *)ptr); + acelen = setup_special_user_owner_ACE((struct smb_ace *)ptr); ptr += acelen; acl_size += acelen; ace_count += 1; } /* and one more ACE to allow access for authenticated users */ - acelen = setup_authusers_ACE((struct cifs_ace *)ptr); + acelen = setup_authusers_ACE((struct smb_ace *)ptr); ptr += acelen; acl_size += acelen; ace_count += 1; From 7f6f054df9e25f4556ee5bd62affb4432a4e3caf Mon Sep 17 00:00:00 2001 From: Ralph Boehme Date: Thu, 14 Nov 2024 11:05:13 +0100 Subject: [PATCH 009/216] fs/smb/client: implement chmod() for SMB3 POSIX Extensions [ Upstream commit d413eabff18d640031fc955d107ad9c03c3bf9f1 ] The NT ACL format for an SMB3 POSIX Extensions chmod() is a single ACE with the magic S-1-5-88-3-mode SID: NT Security Descriptor Revision: 1 Type: 0x8004, Self Relative, DACL Present Offset to owner SID: 56 Offset to group SID: 124 Offset to SACL: 0 Offset to DACL: 20 Owner: S-1-5-21-3177838999-3893657415-1037673384-1000 Group: S-1-22-2-1000 NT User (DACL) ACL Revision: NT4 (2) Size: 36 Num ACEs: 1 NT ACE: S-1-5-88-3-438, flags 0x00, Access Allowed, mask 0x00000000 Type: Access Allowed NT ACE Flags: 0x00 Size: 28 Access required: 0x00000000 SID: S-1-5-88-3-438 Owner and Group should be NULL, but the server is not required to fail the request if they are present. Signed-off-by: Ralph Boehme Cc: stable@vger.kernel.org Signed-off-by: Steve French Signed-off-by: Sasha Levin (cherry picked from commit 5f36890d650ca9fb02072ea42f7699b8ffdff75e) --- fs/smb/client/cifsacl.c | 50 +++++++++++++++++++++++---------------- fs/smb/client/cifsproto.h | 4 +++- fs/smb/client/inode.c | 4 +++- fs/smb/client/smb2pdu.c | 2 +- 4 files changed, 37 insertions(+), 23 deletions(-) diff --git a/fs/smb/client/cifsacl.c b/fs/smb/client/cifsacl.c index e2ec1d9343354..bff8d0dd74fe7 100644 --- a/fs/smb/client/cifsacl.c +++ b/fs/smb/client/cifsacl.c @@ -885,12 +885,17 @@ unsigned int setup_authusers_ACE(struct smb_ace *pntace) * Fill in the special SID based on the mode. See * https://technet.microsoft.com/en-us/library/hh509017(v=ws.10).aspx */ -unsigned int setup_special_mode_ACE(struct smb_ace *pntace, __u64 nmode) +unsigned int setup_special_mode_ACE(struct smb_ace *pntace, + bool posix, + __u64 nmode) { int i; unsigned int ace_size = 28; - pntace->type = ACCESS_DENIED_ACE_TYPE; + if (posix) + pntace->type = ACCESS_ALLOWED_ACE_TYPE; + else + pntace->type = ACCESS_DENIED_ACE_TYPE; pntace->flags = 0x0; pntace->access_req = 0; pntace->sid.num_subauth = 3; @@ -933,7 +938,8 @@ static void populate_new_aces(char *nacl_base, struct smb_sid *pownersid, struct smb_sid *pgrpsid, __u64 *pnmode, u32 *pnum_aces, u16 *pnsize, - bool modefromsid) + bool modefromsid, + bool posix) { __u64 nmode; u32 num_aces = 0; @@ -950,13 +956,15 @@ static void populate_new_aces(char *nacl_base, num_aces = *pnum_aces; nsize = *pnsize; - if (modefromsid) { - pnntace = (struct smb_ace *) (nacl_base + nsize); - nsize += setup_special_mode_ACE(pnntace, nmode); - num_aces++; + if (modefromsid || posix) { pnntace = (struct smb_ace *) (nacl_base + nsize); - nsize += setup_authusers_ACE(pnntace); + nsize += setup_special_mode_ACE(pnntace, posix, nmode); num_aces++; + if (modefromsid) { + pnntace = (struct smb_ace *) (nacl_base + nsize); + nsize += setup_authusers_ACE(pnntace); + num_aces++; + } goto set_size; } @@ -1076,7 +1084,7 @@ static __u16 replace_sids_and_copy_aces(struct smb_acl *pdacl, struct smb_acl *p static int set_chmod_dacl(struct smb_acl *pdacl, struct smb_acl *pndacl, struct smb_sid *pownersid, struct smb_sid *pgrpsid, - __u64 *pnmode, bool mode_from_sid) + __u64 *pnmode, bool mode_from_sid, bool posix) { int i; u16 size = 0; @@ -1094,11 +1102,11 @@ static int set_chmod_dacl(struct smb_acl *pdacl, struct smb_acl *pndacl, nsize = sizeof(struct smb_acl); /* If pdacl is NULL, we don't have a src. Simply populate new ACL. */ - if (!pdacl) { + if (!pdacl || posix) { populate_new_aces(nacl_base, pownersid, pgrpsid, pnmode, &num_aces, &nsize, - mode_from_sid); + mode_from_sid, posix); goto finalize_dacl; } @@ -1115,7 +1123,7 @@ static int set_chmod_dacl(struct smb_acl *pdacl, struct smb_acl *pndacl, populate_new_aces(nacl_base, pownersid, pgrpsid, pnmode, &num_aces, &nsize, - mode_from_sid); + mode_from_sid, posix); new_aces_set = true; } @@ -1144,7 +1152,7 @@ static int set_chmod_dacl(struct smb_acl *pdacl, struct smb_acl *pndacl, populate_new_aces(nacl_base, pownersid, pgrpsid, pnmode, &num_aces, &nsize, - mode_from_sid); + mode_from_sid, posix); new_aces_set = true; } @@ -1251,7 +1259,7 @@ static int parse_sec_desc(struct cifs_sb_info *cifs_sb, /* Convert permission bits from mode to equivalent CIFS ACL */ static int build_sec_desc(struct smb_ntsd *pntsd, struct smb_ntsd *pnntsd, __u32 secdesclen, __u32 *pnsecdesclen, __u64 *pnmode, kuid_t uid, kgid_t gid, - bool mode_from_sid, bool id_from_sid, int *aclflag) + bool mode_from_sid, bool id_from_sid, bool posix, int *aclflag) { int rc = 0; __u32 dacloffset; @@ -1288,7 +1296,7 @@ static int build_sec_desc(struct smb_ntsd *pntsd, struct smb_ntsd *pnntsd, ndacl_ptr->num_aces = cpu_to_le32(0); rc = set_chmod_dacl(dacl_ptr, ndacl_ptr, owner_sid_ptr, group_sid_ptr, - pnmode, mode_from_sid); + pnmode, mode_from_sid, posix); sidsoffset = ndacloffset + le16_to_cpu(ndacl_ptr->size); /* copy the non-dacl portion of secdesc */ @@ -1587,6 +1595,7 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 *pnmode, struct tcon_link *tlink = cifs_sb_tlink(cifs_sb); struct smb_version_operations *ops; bool mode_from_sid, id_from_sid; + bool posix = tlink_tcon(tlink)->posix_extensions; const u32 info = 0; if (IS_ERR(tlink)) @@ -1622,12 +1631,13 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 *pnmode, id_from_sid = false; /* Potentially, five new ACEs can be added to the ACL for U,G,O mapping */ - nsecdesclen = secdesclen; if (pnmode && *pnmode != NO_CHANGE_64) { /* chmod */ - if (mode_from_sid) - nsecdesclen += 2 * sizeof(struct smb_ace); + if (posix) + nsecdesclen = 1 * sizeof(struct smb_ace); + else if (mode_from_sid) + nsecdesclen = secdesclen + (2 * sizeof(struct smb_ace)); else /* cifsacl */ - nsecdesclen += 5 * sizeof(struct smb_ace); + nsecdesclen = secdesclen + (5 * sizeof(struct smb_ace)); } else { /* chown */ /* When ownership changes, changes new owner sid length could be different */ nsecdesclen = sizeof(struct smb_ntsd) + (sizeof(struct smb_sid) * 2); @@ -1657,7 +1667,7 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 *pnmode, } rc = build_sec_desc(pntsd, pnntsd, secdesclen, &nsecdesclen, pnmode, uid, gid, - mode_from_sid, id_from_sid, &aclflag); + mode_from_sid, id_from_sid, posix, &aclflag); cifs_dbg(NOISY, "build_sec_desc rc: %d\n", rc); diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index 6399dbd04625f..a151ffffc6f38 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h @@ -242,7 +242,9 @@ extern int cifs_set_acl(struct mnt_idmap *idmap, extern int set_cifs_acl(struct smb_ntsd *pntsd, __u32 len, struct inode *ino, const char *path, int flag); extern unsigned int setup_authusers_ACE(struct smb_ace *pace); -extern unsigned int setup_special_mode_ACE(struct smb_ace *pace, __u64 nmode); +extern unsigned int setup_special_mode_ACE(struct smb_ace *pace, + bool posix, + __u64 nmode); extern unsigned int setup_special_user_owner_ACE(struct smb_ace *pace); extern void dequeue_mid(struct mid_q_entry *mid, bool malformed); diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c index ce7e0aed8f7d2..b3e59a7c71205 100644 --- a/fs/smb/client/inode.c +++ b/fs/smb/client/inode.c @@ -3087,6 +3087,7 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) int rc = -EACCES; __u32 dosattr = 0; __u64 mode = NO_CHANGE_64; + bool posix = cifs_sb_master_tcon(cifs_sb)->posix_extensions; xid = get_xid(); @@ -3177,7 +3178,8 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) mode = attrs->ia_mode; rc = 0; if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) || - (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MODE_FROM_SID)) { + (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MODE_FROM_SID) || + posix) { rc = id_mode_to_cifs_acl(inode, full_path, &mode, INVALID_UID, INVALID_GID); if (rc) { diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 101c80f22d776..c012fbc2638ed 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -2672,7 +2672,7 @@ create_sd_buf(umode_t mode, bool set_owner, unsigned int *len) ptr += sizeof(struct smb3_acl); /* create one ACE to hold the mode embedded in reserved special SID */ - acelen = setup_special_mode_ACE((struct smb_ace *)ptr, (__u64)mode); + acelen = setup_special_mode_ACE((struct smb_ace *)ptr, false, (__u64)mode); ptr += acelen; acl_size = acelen + sizeof(struct smb3_acl); ace_count = 1; From 26d53c9cefad634f16b3fcd1bbb6bfb2648befe3 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Wed, 18 Sep 2024 02:04:01 -0300 Subject: [PATCH 010/216] smb: client: stop flooding dmesg in smb2_calc_signature() [ Upstream commit a13ca780afab350f37f8be9eda2bf79d1aed9bdd ] When having several mounts that share same credential and the client couldn't re-establish an SMB session due to an expired kerberos ticket or rotated password, smb2_calc_signature() will end up flooding dmesg when not finding SMB sessions to calculate signatures. Signed-off-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French Stable-dep-of: 343d7fe6df9e ("smb: client: fix use-after-free of signing key") Signed-off-by: Sasha Levin (cherry picked from commit d7cb986425ce28ccd571216e8041774939337035) --- fs/smb/client/smb2transport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/client/smb2transport.c b/fs/smb/client/smb2transport.c index 4ca04e62a993c..73eae1b160349 100644 --- a/fs/smb/client/smb2transport.c +++ b/fs/smb/client/smb2transport.c @@ -242,7 +242,7 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server, ses = smb2_find_smb_ses(server, le64_to_cpu(shdr->SessionId)); if (unlikely(!ses)) { - cifs_server_dbg(VFS, "%s: Could not find session\n", __func__); + cifs_server_dbg(FYI, "%s: Could not find session\n", __func__); return -ENOENT; } From 9610d3dea10889205c708264d1a40657a88fd653 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Mon, 11 Nov 2024 10:40:55 -0300 Subject: [PATCH 011/216] smb: client: fix use-after-free of signing key [ Upstream commit 343d7fe6df9e247671440a932b6a73af4fa86d95 ] Customers have reported use-after-free in @ses->auth_key.response with SMB2.1 + sign mounts which occurs due to following race: task A task B cifs_mount() dfs_mount_share() get_session() cifs_mount_get_session() cifs_send_recv() cifs_get_smb_ses() compound_send_recv() cifs_setup_session() smb2_setup_request() kfree_sensitive() smb2_calc_signature() crypto_shash_setkey() *UAF* Fix this by ensuring that we have a valid @ses->auth_key.response by checking whether @ses->ses_status is SES_GOOD or SES_EXITING with @ses->ses_lock held. After commit 24a9799aa8ef ("smb: client: fix UAF in smb2_reconnect_server()"), we made sure to call ->logoff() only when @ses was known to be good (e.g. valid ->auth_key.response), so it's safe to access signing key when @ses->ses_status == SES_EXITING. Cc: stable@vger.kernel.org Reported-by: Jay Shin Signed-off-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French Signed-off-by: Sasha Levin (cherry picked from commit 39619c65ab4bbb3e78c818f537687653e112764d) --- fs/smb/client/smb2proto.h | 2 -- fs/smb/client/smb2transport.c | 56 +++++++++++++++++++++++++---------- 2 files changed, 40 insertions(+), 18 deletions(-) diff --git a/fs/smb/client/smb2proto.h b/fs/smb/client/smb2proto.h index fd90d8e5a1d16..750e4e397b139 100644 --- a/fs/smb/client/smb2proto.h +++ b/fs/smb/client/smb2proto.h @@ -37,8 +37,6 @@ extern struct mid_q_entry *smb2_setup_request(struct cifs_ses *ses, struct smb_rqst *rqst); extern struct mid_q_entry *smb2_setup_async_request( struct TCP_Server_Info *server, struct smb_rqst *rqst); -extern struct cifs_ses *smb2_find_smb_ses(struct TCP_Server_Info *server, - __u64 ses_id); extern struct cifs_tcon *smb2_find_smb_tcon(struct TCP_Server_Info *server, __u64 ses_id, __u32 tid); extern int smb2_calc_signature(struct smb_rqst *rqst, diff --git a/fs/smb/client/smb2transport.c b/fs/smb/client/smb2transport.c index 73eae1b160349..4a43802375b3a 100644 --- a/fs/smb/client/smb2transport.c +++ b/fs/smb/client/smb2transport.c @@ -74,7 +74,7 @@ smb311_crypto_shash_allocate(struct TCP_Server_Info *server) static -int smb2_get_sign_key(__u64 ses_id, struct TCP_Server_Info *server, u8 *key) +int smb3_get_sign_key(__u64 ses_id, struct TCP_Server_Info *server, u8 *key) { struct cifs_chan *chan; struct TCP_Server_Info *pserver; @@ -168,16 +168,41 @@ smb2_find_smb_ses_unlocked(struct TCP_Server_Info *server, __u64 ses_id) return NULL; } -struct cifs_ses * -smb2_find_smb_ses(struct TCP_Server_Info *server, __u64 ses_id) +static int smb2_get_sign_key(struct TCP_Server_Info *server, + __u64 ses_id, u8 *key) { struct cifs_ses *ses; + int rc = -ENOENT; + + if (SERVER_IS_CHAN(server)) + server = server->primary_server; spin_lock(&cifs_tcp_ses_lock); - ses = smb2_find_smb_ses_unlocked(server, ses_id); - spin_unlock(&cifs_tcp_ses_lock); + list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { + if (ses->Suid != ses_id) + continue; - return ses; + rc = 0; + spin_lock(&ses->ses_lock); + switch (ses->ses_status) { + case SES_EXITING: /* SMB2_LOGOFF */ + case SES_GOOD: + if (likely(ses->auth_key.response)) { + memcpy(key, ses->auth_key.response, + SMB2_NTLMV2_SESSKEY_SIZE); + } else { + rc = -EIO; + } + break; + default: + rc = -EAGAIN; + break; + } + spin_unlock(&ses->ses_lock); + break; + } + spin_unlock(&cifs_tcp_ses_lock); + return rc; } static struct cifs_tcon * @@ -236,14 +261,16 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server, unsigned char *sigptr = smb2_signature; struct kvec *iov = rqst->rq_iov; struct smb2_hdr *shdr = (struct smb2_hdr *)iov[0].iov_base; - struct cifs_ses *ses; struct shash_desc *shash = NULL; struct smb_rqst drqst; + __u64 sid = le64_to_cpu(shdr->SessionId); + u8 key[SMB2_NTLMV2_SESSKEY_SIZE]; - ses = smb2_find_smb_ses(server, le64_to_cpu(shdr->SessionId)); - if (unlikely(!ses)) { - cifs_server_dbg(FYI, "%s: Could not find session\n", __func__); - return -ENOENT; + rc = smb2_get_sign_key(server, sid, key); + if (unlikely(rc)) { + cifs_server_dbg(FYI, "%s: [sesid=0x%llx] couldn't find signing key: %d\n", + __func__, sid, rc); + return rc; } memset(smb2_signature, 0x0, SMB2_HMACSHA256_SIZE); @@ -260,8 +287,7 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server, shash = server->secmech.hmacsha256; } - rc = crypto_shash_setkey(shash->tfm, ses->auth_key.response, - SMB2_NTLMV2_SESSKEY_SIZE); + rc = crypto_shash_setkey(shash->tfm, key, sizeof(key)); if (rc) { cifs_server_dbg(VFS, "%s: Could not update with response\n", @@ -303,8 +329,6 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server, out: if (allocate_crypto) cifs_free_hash(&shash); - if (ses) - cifs_put_smb_ses(ses); return rc; } @@ -570,7 +594,7 @@ smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server, struct smb_rqst drqst; u8 key[SMB3_SIGN_KEY_SIZE]; - rc = smb2_get_sign_key(le64_to_cpu(shdr->SessionId), server, key); + rc = smb3_get_sign_key(le64_to_cpu(shdr->SessionId), server, key); if (unlikely(rc)) { cifs_server_dbg(FYI, "%s: Could not get signing key\n", __func__); return rc; From b1921671149d29d7939e1c43c3b8bbbd45c53ab8 Mon Sep 17 00:00:00 2001 From: Selvarasu Ganesan Date: Tue, 12 Nov 2024 10:18:02 +0530 Subject: [PATCH 012/216] usb: dwc3: gadget: Add missing check for single port RAM in TxFIFO resizing logic [ Upstream commit 61eb055cd3048ee01ca43d1be924167d33e16fdc ] The existing implementation of the TxFIFO resizing logic only supports scenarios where more than one port RAM is used. However, there is a need to resize the TxFIFO in USB2.0-only mode where only a single port RAM is available. This commit introduces the necessary changes to support TxFIFO resizing in such scenarios by adding a missing check for single port RAM. This fix addresses certain platform configurations where the existing TxFIFO resizing logic does not work properly due to the absence of support for single port RAM. By adding this missing check, we ensure that the TxFIFO resizing logic works correctly in all scenarios, including those with a single port RAM. Fixes: 9f607a309fbe ("usb: dwc3: Resize TX FIFOs to meet EP bursting requirements") Cc: stable@vger.kernel.org # 6.12.x: fad16c82: usb: dwc3: gadget: Refine the logic for resizing Tx FIFOs Signed-off-by: Selvarasu Ganesan Acked-by: Thinh Nguyen Link: https://lore.kernel.org/r/20241112044807.623-1-selvarasu.g@samsung.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin (cherry picked from commit 106740e978c716c755c0aeb12efb2180fa5ee962) --- drivers/usb/dwc3/core.h | 4 +++ drivers/usb/dwc3/gadget.c | 54 +++++++++++++++++++++++++++++++++------ 2 files changed, 50 insertions(+), 8 deletions(-) diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index b118f4aab1898..d00bf714a7ccf 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -899,6 +899,7 @@ struct dwc3_hwparams { #define DWC3_MODE(n) ((n) & 0x7) /* HWPARAMS1 */ +#define DWC3_SPRAM_TYPE(n) (((n) >> 23) & 1) #define DWC3_NUM_INT(n) (((n) & (0x3f << 15)) >> 15) /* HWPARAMS3 */ @@ -909,6 +910,9 @@ struct dwc3_hwparams { #define DWC3_NUM_IN_EPS(p) (((p)->hwparams3 & \ (DWC3_NUM_IN_EPS_MASK)) >> 18) +/* HWPARAMS6 */ +#define DWC3_RAM0_DEPTH(n) (((n) & (0xffff0000)) >> 16) + /* HWPARAMS7 */ #define DWC3_RAM1_DEPTH(n) ((n) & 0xffff) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index b560996bd4218..656460c0c1dd7 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -687,6 +687,44 @@ static int dwc3_gadget_calc_tx_fifo_size(struct dwc3 *dwc, int mult) return fifo_size; } +/** + * dwc3_gadget_calc_ram_depth - calculates the ram depth for txfifo + * @dwc: pointer to the DWC3 context + */ +static int dwc3_gadget_calc_ram_depth(struct dwc3 *dwc) +{ + int ram_depth; + int fifo_0_start; + bool is_single_port_ram; + + /* Check supporting RAM type by HW */ + is_single_port_ram = DWC3_SPRAM_TYPE(dwc->hwparams.hwparams1); + + /* + * If a single port RAM is utilized, then allocate TxFIFOs from + * RAM0. otherwise, allocate them from RAM1. + */ + ram_depth = is_single_port_ram ? DWC3_RAM0_DEPTH(dwc->hwparams.hwparams6) : + DWC3_RAM1_DEPTH(dwc->hwparams.hwparams7); + + /* + * In a single port RAM configuration, the available RAM is shared + * between the RX and TX FIFOs. This means that the txfifo can begin + * at a non-zero address. + */ + if (is_single_port_ram) { + u32 reg; + + /* Check if TXFIFOs start at non-zero addr */ + reg = dwc3_readl(dwc->regs, DWC3_GTXFIFOSIZ(0)); + fifo_0_start = DWC3_GTXFIFOSIZ_TXFSTADDR(reg); + + ram_depth -= (fifo_0_start >> 16); + } + + return ram_depth; +} + /** * dwc3_gadget_clear_tx_fifos - Clears txfifo allocation * @dwc: pointer to the DWC3 context @@ -753,7 +791,7 @@ static int dwc3_gadget_resize_tx_fifos(struct dwc3_ep *dep) { struct dwc3 *dwc = dep->dwc; int fifo_0_start; - int ram1_depth; + int ram_depth; int fifo_size; int min_depth; int num_in_ep; @@ -773,7 +811,7 @@ static int dwc3_gadget_resize_tx_fifos(struct dwc3_ep *dep) if (dep->flags & DWC3_EP_TXFIFO_RESIZED) return 0; - ram1_depth = DWC3_RAM1_DEPTH(dwc->hwparams.hwparams7); + ram_depth = dwc3_gadget_calc_ram_depth(dwc); if ((dep->endpoint.maxburst > 1 && usb_endpoint_xfer_bulk(dep->endpoint.desc)) || @@ -794,7 +832,7 @@ static int dwc3_gadget_resize_tx_fifos(struct dwc3_ep *dep) /* Reserve at least one FIFO for the number of IN EPs */ min_depth = num_in_ep * (fifo + 1); - remaining = ram1_depth - min_depth - dwc->last_fifo_depth; + remaining = ram_depth - min_depth - dwc->last_fifo_depth; remaining = max_t(int, 0, remaining); /* * We've already reserved 1 FIFO per EP, so check what we can fit in @@ -820,9 +858,9 @@ static int dwc3_gadget_resize_tx_fifos(struct dwc3_ep *dep) dwc->last_fifo_depth += DWC31_GTXFIFOSIZ_TXFDEP(fifo_size); /* Check fifo size allocation doesn't exceed available RAM size. */ - if (dwc->last_fifo_depth >= ram1_depth) { + if (dwc->last_fifo_depth >= ram_depth) { dev_err(dwc->dev, "Fifosize(%d) > RAM size(%d) %s depth:%d\n", - dwc->last_fifo_depth, ram1_depth, + dwc->last_fifo_depth, ram_depth, dep->endpoint.name, fifo_size); if (DWC3_IP_IS(DWC3)) fifo_size = DWC3_GTXFIFOSIZ_TXFDEP(fifo_size); @@ -3078,7 +3116,7 @@ static int dwc3_gadget_check_config(struct usb_gadget *g) struct dwc3 *dwc = gadget_to_dwc(g); struct usb_ep *ep; int fifo_size = 0; - int ram1_depth; + int ram_depth; int ep_num = 0; if (!dwc->do_fifo_resize) @@ -3101,8 +3139,8 @@ static int dwc3_gadget_check_config(struct usb_gadget *g) fifo_size += dwc->max_cfg_eps; /* Check if we can fit a single fifo per endpoint */ - ram1_depth = DWC3_RAM1_DEPTH(dwc->hwparams.hwparams7); - if (fifo_size > ram1_depth) + ram_depth = dwc3_gadget_calc_ram_depth(dwc); + if (fifo_size > ram_depth) return -ENOMEM; return 0; From e37b0870a88ea1434c7b7696c659955a475356e3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 28 Oct 2024 11:43:42 +0100 Subject: [PATCH 013/216] sched: Initialize idle tasks only once [ Upstream commit b23decf8ac9102fc52c4de5196f4dc0a5f3eb80b ] Idle tasks are initialized via __sched_fork() twice: fork_idle() copy_process() sched_fork() __sched_fork() init_idle() __sched_fork() Instead of cleaning this up, sched_ext hacked around it. Even when analyis and solution were provided in a discussion, nobody cared to clean this up. init_idle() is also invoked from sched_init() to initialize the boot CPU's idle task, which requires the __sched_fork() invocation. But this can be trivially solved by invoking __sched_fork() before init_idle() in sched_init() and removing the __sched_fork() invocation from init_idle(). Do so and clean up the comments explaining this historical leftover. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20241028103142.359584747@linutronix.de Signed-off-by: Sasha Levin (cherry picked from commit 3adf89f17dbdac2e12eec31654eea93d0b016811) --- kernel/sched/core.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 475fd87cea5f6..d5e3262a5dd48 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4488,7 +4488,8 @@ int wake_up_state(struct task_struct *p, unsigned int state) * Perform scheduler related setup for a newly forked process p. * p is forked by current. * - * __sched_fork() is basic setup used by init_idle() too: + * __sched_fork() is basic setup which is also used by sched_init() to + * initialize the boot CPU's idle task. */ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) { @@ -9267,8 +9268,6 @@ void __init init_idle(struct task_struct *idle, int cpu) struct rq *rq = cpu_rq(cpu); unsigned long flags; - __sched_fork(0, idle); - raw_spin_lock_irqsave(&idle->pi_lock, flags); raw_spin_rq_lock(rq); @@ -9283,10 +9282,8 @@ void __init init_idle(struct task_struct *idle, int cpu) #ifdef CONFIG_SMP /* - * It's possible that init_idle() gets called multiple times on a task, - * in that case do_set_cpus_allowed() will not do the right thing. - * - * And since this is boot we can forgo the serialization. + * No validation and serialization required at boot time and for + * setting up the idle tasks of not yet online CPUs. */ set_cpus_allowed_common(idle, &ac); #endif @@ -10115,6 +10112,7 @@ void __init sched_init(void) * but because we are the idle thread, we just pick up running again * when this runqueue becomes "idle". */ + __sched_fork(0, current); init_idle(current, smp_processor_id()); calc_load_update = jiffies + LOAD_FREQ; From 236b206287a3f870badcb260cb93dc0cb89630a2 Mon Sep 17 00:00:00 2001 From: Liam Ni Date: Thu, 26 Oct 2023 10:03:29 +0800 Subject: [PATCH 014/216] NUMA: optimize detection of memory with no node id assigned by firmware [ Upstream commit ff6c3d81f2e86b63a3a530683f89ef393882782a ] Sanity check that makes sure the nodes cover all memory loops over numa_meminfo to count the pages that have node id assigned by the firmware, then loops again over memblock.memory to find the total amount of memory and in the end checks that the difference between the total memory and memory that covered by nodes is less than some threshold. Worse, the loop over numa_meminfo calls __absent_pages_in_range() that also partially traverses memblock.memory. It's much simpler and more efficient to have a single traversal of memblock.memory that verifies that amount of memory not covered by nodes is less than a threshold. Introduce memblock_validate_numa_coverage() that does exactly that and use it instead of numa_meminfo_cover_memory(). Link: https://lkml.kernel.org/r/20231026020329.327329-1-zhiguangni01@gmail.com Signed-off-by: Liam Ni Reviewed-by: Mike Rapoport (IBM) Cc: Andy Lutomirski Cc: Bibo Mao Cc: Binbin Zhou Cc: Borislav Petkov Cc: Dave Hansen Cc: Feiyang Chen Cc: "H. Peter Anvin" Cc: Huacai Chen Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: WANG Xuerui Signed-off-by: Andrew Morton Stable-dep-of: 9cdc6423acb4 ("memblock: allow zero threshold in validate_numa_converage()") Signed-off-by: Sasha Levin (cherry picked from commit 6fdc770506eb8379bf68a49d4e193c8364ac64e0) --- arch/loongarch/kernel/numa.c | 28 +--------------------------- arch/x86/mm/numa.c | 34 ++-------------------------------- include/linux/memblock.h | 1 + mm/memblock.c | 34 ++++++++++++++++++++++++++++++++++ 4 files changed, 38 insertions(+), 59 deletions(-) diff --git a/arch/loongarch/kernel/numa.c b/arch/loongarch/kernel/numa.c index 618c2d70079cb..5d45c389e9612 100644 --- a/arch/loongarch/kernel/numa.c +++ b/arch/loongarch/kernel/numa.c @@ -228,32 +228,6 @@ static void __init node_mem_init(unsigned int node) #ifdef CONFIG_ACPI_NUMA -/* - * Sanity check to catch more bad NUMA configurations (they are amazingly - * common). Make sure the nodes cover all memory. - */ -static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi) -{ - int i; - u64 numaram, biosram; - - numaram = 0; - for (i = 0; i < mi->nr_blks; i++) { - u64 s = mi->blk[i].start >> PAGE_SHIFT; - u64 e = mi->blk[i].end >> PAGE_SHIFT; - - numaram += e - s; - numaram -= __absent_pages_in_range(mi->blk[i].nid, s, e); - if ((s64)numaram < 0) - numaram = 0; - } - max_pfn = max_low_pfn; - biosram = max_pfn - absent_pages_in_range(0, max_pfn); - - BUG_ON((s64)(biosram - numaram) >= (1 << (20 - PAGE_SHIFT))); - return true; -} - static void __init add_node_intersection(u32 node, u64 start, u64 size, u32 type) { static unsigned long num_physpages; @@ -399,7 +373,7 @@ int __init init_numa_memory(void) init_node_memblock(); bpi_init_node_memblock(add_numamem_region); - if (numa_meminfo_cover_memory(&numa_meminfo) == false) + if (!memblock_validate_numa_coverage(SZ_1M)) return -EINVAL; for_each_node_mask(node, node_possible_map) { diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index c7fa5396c0f05..2c67bfc3cf320 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -448,37 +448,6 @@ int __node_distance(int from, int to) } EXPORT_SYMBOL(__node_distance); -/* - * Sanity check to catch more bad NUMA configurations (they are amazingly - * common). Make sure the nodes cover all memory. - */ -static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi) -{ - u64 numaram, e820ram; - int i; - - numaram = 0; - for (i = 0; i < mi->nr_blks; i++) { - u64 s = mi->blk[i].start >> PAGE_SHIFT; - u64 e = mi->blk[i].end >> PAGE_SHIFT; - numaram += e - s; - numaram -= __absent_pages_in_range(mi->blk[i].nid, s, e); - if ((s64)numaram < 0) - numaram = 0; - } - - e820ram = max_pfn - absent_pages_in_range(0, max_pfn); - - /* We seem to lose 3 pages somewhere. Allow 1M of slack. */ - if ((s64)(e820ram - numaram) >= (1 << (20 - PAGE_SHIFT))) { - printk(KERN_ERR "NUMA: nodes only cover %LuMB of your %LuMB e820 RAM. Not used.\n", - (numaram << PAGE_SHIFT) >> 20, - (e820ram << PAGE_SHIFT) >> 20); - return false; - } - return true; -} - /* * Mark all currently memblock-reserved physical memory (which covers the * kernel's own memory ranges) as hot-unswappable. @@ -584,7 +553,8 @@ static int __init numa_register_memblks(struct numa_meminfo *mi) return -EINVAL; } } - if (!numa_meminfo_cover_memory(mi)) + + if (!memblock_validate_numa_coverage(SZ_1M)) return -EINVAL; /* Finally register nodes. */ diff --git a/include/linux/memblock.h b/include/linux/memblock.h index ed57c23f80ac2..ed64240041e85 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -122,6 +122,7 @@ unsigned long memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1, phys_addr_t base2, phys_addr_t size2); bool memblock_overlaps_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size); +bool memblock_validate_numa_coverage(unsigned long threshold_bytes); int memblock_mark_hotplug(phys_addr_t base, phys_addr_t size); int memblock_clear_hotplug(phys_addr_t base, phys_addr_t size); int memblock_mark_mirror(phys_addr_t base, phys_addr_t size); diff --git a/mm/memblock.c b/mm/memblock.c index d630f5c2bdb90..3a3ab73546f54 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -735,6 +735,40 @@ int __init_memblock memblock_add(phys_addr_t base, phys_addr_t size) return memblock_add_range(&memblock.memory, base, size, MAX_NUMNODES, 0); } +/** + * memblock_validate_numa_coverage - check if amount of memory with + * no node ID assigned is less than a threshold + * @threshold_bytes: maximal number of pages that can have unassigned node + * ID (in bytes). + * + * A buggy firmware may report memory that does not belong to any node. + * Check if amount of such memory is below @threshold_bytes. + * + * Return: true on success, false on failure. + */ +bool __init_memblock memblock_validate_numa_coverage(unsigned long threshold_bytes) +{ + unsigned long nr_pages = 0; + unsigned long start_pfn, end_pfn, mem_size_mb; + int nid, i; + + /* calculate lose page */ + for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) { + if (nid == NUMA_NO_NODE) + nr_pages += end_pfn - start_pfn; + } + + if ((nr_pages << PAGE_SHIFT) >= threshold_bytes) { + mem_size_mb = memblock_phys_mem_size() >> 20; + pr_err("NUMA: no nodes coverage for %luMB of %luMB RAM\n", + (nr_pages << PAGE_SHIFT) >> 20, mem_size_mb); + return false; + } + + return true; +} + + /** * memblock_isolate_range - isolate given range into disjoint memblocks * @type: memblock type to isolate range for From 8a43685ae0cac3878f036aaf46e5ca0b25f04be6 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Fri, 29 Nov 2024 11:13:47 +0200 Subject: [PATCH 015/216] memblock: allow zero threshold in validate_numa_converage() [ Upstream commit 9cdc6423acb49055efb444ecd895d853a70ef931 ] Currently memblock validate_numa_converage() returns false negative when threshold set to zero. Make the check if the memory size with invalid node ID is greater than the threshold exclusive to fix that. Link: https://lore.kernel.org/all/Z0mIDBD4KLyxyOCm@kernel.org/ Signed-off-by: Mike Rapoport (Microsoft) Signed-off-by: Sasha Levin (cherry picked from commit 1864d4712c4b3b46a23ddddfbf5d3399b50ae161) --- mm/memblock.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/memblock.c b/mm/memblock.c index 3a3ab73546f54..87a2b4340ce4e 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -738,7 +738,7 @@ int __init_memblock memblock_add(phys_addr_t base, phys_addr_t size) /** * memblock_validate_numa_coverage - check if amount of memory with * no node ID assigned is less than a threshold - * @threshold_bytes: maximal number of pages that can have unassigned node + * @threshold_bytes: maximal memory size that can have unassigned node * ID (in bytes). * * A buggy firmware may report memory that does not belong to any node. @@ -758,7 +758,7 @@ bool __init_memblock memblock_validate_numa_coverage(unsigned long threshold_byt nr_pages += end_pfn - start_pfn; } - if ((nr_pages << PAGE_SHIFT) >= threshold_bytes) { + if ((nr_pages << PAGE_SHIFT) > threshold_bytes) { mem_size_mb = memblock_phys_mem_size() >> 20; pr_err("NUMA: no nodes coverage for %luMB of %luMB RAM\n", (nr_pages << PAGE_SHIFT) >> 20, mem_size_mb); From 207eeb0520f00e8bd55a84efd25ebb29b15efd06 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 4 Oct 2023 14:52:20 -0400 Subject: [PATCH 016/216] ext4: convert to new timestamp accessors [ Upstream commit b898ab233611f7903d88c0b10f8145e1c15d3642 ] Convert to using the new inode timestamp accessor functions. Signed-off-by: Jeff Layton Link: https://lore.kernel.org/r/20231004185347.80880-33-jlayton@kernel.org Signed-off-by: Christian Brauner Stable-dep-of: c7fc0366c656 ("ext4: partial zero eof block on unaligned inode size extension") Signed-off-by: Sasha Levin (cherry picked from commit fa42d5f1327f72a2034d3d82e6d78597e920f350) --- fs/ext4/ext4.h | 20 +++++++++++++++----- fs/ext4/extents.c | 11 ++++++----- fs/ext4/ialloc.c | 4 ++-- fs/ext4/inline.c | 4 ++-- fs/ext4/inode.c | 19 ++++++++++--------- fs/ext4/ioctl.c | 13 +++++++++++-- fs/ext4/namei.c | 10 +++++----- fs/ext4/super.c | 2 +- fs/ext4/xattr.c | 8 ++++---- 9 files changed, 56 insertions(+), 35 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 3db01b933c3e8..60455c84a9374 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -891,10 +891,13 @@ do { \ (raw_inode)->xtime = cpu_to_le32(clamp_t(int32_t, (ts).tv_sec, S32_MIN, S32_MAX)); \ } while (0) -#define EXT4_INODE_SET_XTIME(xtime, inode, raw_inode) \ - EXT4_INODE_SET_XTIME_VAL(xtime, inode, raw_inode, (inode)->xtime) +#define EXT4_INODE_SET_ATIME(inode, raw_inode) \ + EXT4_INODE_SET_XTIME_VAL(i_atime, inode, raw_inode, inode_get_atime(inode)) -#define EXT4_INODE_SET_CTIME(inode, raw_inode) \ +#define EXT4_INODE_SET_MTIME(inode, raw_inode) \ + EXT4_INODE_SET_XTIME_VAL(i_mtime, inode, raw_inode, inode_get_mtime(inode)) + +#define EXT4_INODE_SET_CTIME(inode, raw_inode) \ EXT4_INODE_SET_XTIME_VAL(i_ctime, inode, raw_inode, inode_get_ctime(inode)) #define EXT4_EINODE_SET_XTIME(xtime, einode, raw_inode) \ @@ -910,9 +913,16 @@ do { \ .tv_sec = (signed)le32_to_cpu((raw_inode)->xtime) \ }) -#define EXT4_INODE_GET_XTIME(xtime, inode, raw_inode) \ +#define EXT4_INODE_GET_ATIME(inode, raw_inode) \ +do { \ + inode_set_atime_to_ts(inode, \ + EXT4_INODE_GET_XTIME_VAL(i_atime, inode, raw_inode)); \ +} while (0) + +#define EXT4_INODE_GET_MTIME(inode, raw_inode) \ do { \ - (inode)->xtime = EXT4_INODE_GET_XTIME_VAL(xtime, inode, raw_inode); \ + inode_set_mtime_to_ts(inode, \ + EXT4_INODE_GET_XTIME_VAL(i_mtime, inode, raw_inode)); \ } while (0) #define EXT4_INODE_GET_CTIME(inode, raw_inode) \ diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index c1b0edd7ea4fa..efa7e274ac4b3 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4532,7 +4532,8 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset, if (epos > new_size) epos = new_size; if (ext4_update_inode_size(inode, epos) & 0x1) - inode->i_mtime = inode_get_ctime(inode); + inode_set_mtime_to_ts(inode, + inode_get_ctime(inode)); } ret2 = ext4_mark_inode_dirty(handle, inode); ext4_update_inode_fsync_trans(handle, inode, 1); @@ -4670,7 +4671,7 @@ static long ext4_zero_range(struct file *file, loff_t offset, /* Now release the pages and zero block aligned part of pages */ truncate_pagecache_range(inode, start, end - 1); - inode->i_mtime = inode_set_ctime_current(inode); + inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, flags); @@ -4695,7 +4696,7 @@ static long ext4_zero_range(struct file *file, loff_t offset, goto out_mutex; } - inode->i_mtime = inode_set_ctime_current(inode); + inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); if (new_size) ext4_update_inode_size(inode, new_size); ret = ext4_mark_inode_dirty(handle, inode); @@ -5431,7 +5432,7 @@ static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len) up_write(&EXT4_I(inode)->i_data_sem); if (IS_SYNC(inode)) ext4_handle_sync(handle); - inode->i_mtime = inode_set_ctime_current(inode); + inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); ret = ext4_mark_inode_dirty(handle, inode); ext4_update_inode_fsync_trans(handle, inode, 1); @@ -5541,7 +5542,7 @@ static int ext4_insert_range(struct file *file, loff_t offset, loff_t len) /* Expand file to avoid data loss if there is error while shifting */ inode->i_size += len; EXT4_I(inode)->i_disksize += len; - inode->i_mtime = inode_set_ctime_current(inode); + inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); ret = ext4_mark_inode_dirty(handle, inode); if (ret) goto out_stop; diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index d4d0ad689d3c1..52f2959d29e6e 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -1255,8 +1255,8 @@ struct inode *__ext4_new_inode(struct mnt_idmap *idmap, inode->i_ino = ino + group * EXT4_INODES_PER_GROUP(sb); /* This is the optimal IO size (for stat), not the fs block size */ inode->i_blocks = 0; - inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode); - ei->i_crtime = inode->i_mtime; + simple_inode_init_ts(inode); + ei->i_crtime = inode_get_mtime(inode); memset(ei->i_data, 0, sizeof(ei->i_data)); ei->i_dir_start_lookup = 0; diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index cb65052ee3dec..3f363276ddd36 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -1037,7 +1037,7 @@ static int ext4_add_dirent_to_inline(handle_t *handle, * happen is that the times are slightly out of date * and/or different from the directory change time. */ - dir->i_mtime = inode_set_ctime_current(dir); + inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); ext4_update_dx_flag(dir); inode_inc_iversion(dir); return 1; @@ -2010,7 +2010,7 @@ int ext4_inline_data_truncate(struct inode *inode, int *has_inline) ext4_orphan_del(handle, inode); if (err == 0) { - inode->i_mtime = inode_set_ctime_current(inode); + inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); err = ext4_mark_inode_dirty(handle, inode); if (IS_SYNC(inode)) ext4_handle_sync(handle); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index cd17ac2974b08..15e0dd22658c2 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4055,7 +4055,7 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) if (IS_SYNC(inode)) ext4_handle_sync(handle); - inode->i_mtime = inode_set_ctime_current(inode); + inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); ret2 = ext4_mark_inode_dirty(handle, inode); if (unlikely(ret2)) ret = ret2; @@ -4215,7 +4215,7 @@ int ext4_truncate(struct inode *inode) if (inode->i_nlink) ext4_orphan_del(handle, inode); - inode->i_mtime = inode_set_ctime_current(inode); + inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); err2 = ext4_mark_inode_dirty(handle, inode); if (unlikely(err2 && !err)) err = err2; @@ -4319,8 +4319,8 @@ static int ext4_fill_raw_inode(struct inode *inode, struct ext4_inode *raw_inode raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); EXT4_INODE_SET_CTIME(inode, raw_inode); - EXT4_INODE_SET_XTIME(i_mtime, inode, raw_inode); - EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode); + EXT4_INODE_SET_MTIME(inode, raw_inode); + EXT4_INODE_SET_ATIME(inode, raw_inode); EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode); raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); @@ -4928,8 +4928,8 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, } EXT4_INODE_GET_CTIME(inode, raw_inode); - EXT4_INODE_GET_XTIME(i_mtime, inode, raw_inode); - EXT4_INODE_GET_XTIME(i_atime, inode, raw_inode); + EXT4_INODE_GET_ATIME(inode, raw_inode); + EXT4_INODE_GET_MTIME(inode, raw_inode); EXT4_EINODE_GET_XTIME(i_crtime, ei, raw_inode); if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) { @@ -5054,8 +5054,8 @@ static void __ext4_update_other_inode_time(struct super_block *sb, spin_lock(&ei->i_raw_lock); EXT4_INODE_SET_CTIME(inode, raw_inode); - EXT4_INODE_SET_XTIME(i_mtime, inode, raw_inode); - EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode); + EXT4_INODE_SET_MTIME(inode, raw_inode); + EXT4_INODE_SET_ATIME(inode, raw_inode); ext4_inode_csum_set(inode, raw_inode, ei); spin_unlock(&ei->i_raw_lock); trace_ext4_other_inode_update_time(inode, orig_ino); @@ -5451,7 +5451,8 @@ int ext4_setattr(struct mnt_idmap *idmap, struct dentry *dentry, * update c/mtime in shrink case below */ if (!shrink) - inode->i_mtime = inode_set_ctime_current(inode); + inode_set_mtime_to_ts(inode, + inode_set_ctime_current(inode)); if (shrink) ext4_fc_track_range(handle, inode, diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 0bfe2ce589e22..4f931f80cb348 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -312,13 +312,22 @@ static void swap_inode_data(struct inode *inode1, struct inode *inode2) struct ext4_inode_info *ei1; struct ext4_inode_info *ei2; unsigned long tmp; + struct timespec64 ts1, ts2; ei1 = EXT4_I(inode1); ei2 = EXT4_I(inode2); swap(inode1->i_version, inode2->i_version); - swap(inode1->i_atime, inode2->i_atime); - swap(inode1->i_mtime, inode2->i_mtime); + + ts1 = inode_get_atime(inode1); + ts2 = inode_get_atime(inode2); + inode_set_atime_to_ts(inode1, ts2); + inode_set_atime_to_ts(inode2, ts1); + + ts1 = inode_get_mtime(inode1); + ts2 = inode_get_mtime(inode2); + inode_set_mtime_to_ts(inode1, ts2); + inode_set_mtime_to_ts(inode2, ts1); memswap(ei1->i_data, ei2->i_data, sizeof(ei1->i_data)); tmp = ei1->i_flags & EXT4_FL_SHOULD_SWAP; diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 4de1f61bba76b..96a048d3f51bf 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2210,7 +2210,7 @@ static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname, * happen is that the times are slightly out of date * and/or different from the directory change time. */ - dir->i_mtime = inode_set_ctime_current(dir); + inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); ext4_update_dx_flag(dir); inode_inc_iversion(dir); err2 = ext4_mark_inode_dirty(handle, dir); @@ -3248,7 +3248,7 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry) * recovery. */ inode->i_size = 0; ext4_orphan_add(handle, inode); - dir->i_mtime = inode_set_ctime_current(dir); + inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); inode_set_ctime_current(inode); retval = ext4_mark_inode_dirty(handle, inode); if (retval) @@ -3323,7 +3323,7 @@ int __ext4_unlink(struct inode *dir, const struct qstr *d_name, retval = ext4_delete_entry(handle, dir, de, bh); if (retval) goto out_handle; - dir->i_mtime = inode_set_ctime_current(dir); + inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); ext4_update_dx_flag(dir); retval = ext4_mark_inode_dirty(handle, dir); if (retval) @@ -3691,7 +3691,7 @@ static int ext4_setent(handle_t *handle, struct ext4_renament *ent, if (ext4_has_feature_filetype(ent->dir->i_sb)) ent->de->file_type = file_type; inode_inc_iversion(ent->dir); - ent->dir->i_mtime = inode_set_ctime_current(ent->dir); + inode_set_mtime_to_ts(ent->dir, inode_set_ctime_current(ent->dir)); retval = ext4_mark_inode_dirty(handle, ent->dir); BUFFER_TRACE(ent->bh, "call ext4_handle_dirty_metadata"); if (!ent->inlined) { @@ -4006,7 +4006,7 @@ static int ext4_rename(struct mnt_idmap *idmap, struct inode *old_dir, ext4_dec_count(new.inode); inode_set_ctime_current(new.inode); } - old.dir->i_mtime = inode_set_ctime_current(old.dir); + inode_set_mtime_to_ts(old.dir, inode_set_ctime_current(old.dir)); ext4_update_dx_flag(old.dir); if (old.dir_bh) { retval = ext4_rename_dir_finish(handle, &old, new.dir->i_ino); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 2346ef071b242..71ced0ada9a2e 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -7180,7 +7180,7 @@ static int ext4_quota_off(struct super_block *sb, int type) } EXT4_I(inode)->i_flags &= ~(EXT4_NOATIME_FL | EXT4_IMMUTABLE_FL); inode_set_flags(inode, 0, S_NOATIME | S_IMMUTABLE); - inode->i_mtime = inode_set_ctime_current(inode); + inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); err = ext4_mark_inode_dirty(handle, inode); ext4_journal_stop(handle); out_unlock: diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index f40785bc4e554..df5ab1a75fc48 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -356,7 +356,7 @@ ext4_xattr_inode_hash(struct ext4_sb_info *sbi, const void *buffer, size_t size) static u64 ext4_xattr_inode_get_ref(struct inode *ea_inode) { - return ((u64) inode_get_ctime(ea_inode).tv_sec << 32) | + return ((u64) inode_get_ctime_sec(ea_inode) << 32) | (u32) inode_peek_iversion_raw(ea_inode); } @@ -368,12 +368,12 @@ static void ext4_xattr_inode_set_ref(struct inode *ea_inode, u64 ref_count) static u32 ext4_xattr_inode_get_hash(struct inode *ea_inode) { - return (u32)ea_inode->i_atime.tv_sec; + return (u32) inode_get_atime_sec(ea_inode); } static void ext4_xattr_inode_set_hash(struct inode *ea_inode, u32 hash) { - ea_inode->i_atime.tv_sec = hash; + inode_set_atime(ea_inode, hash, 0); } /* @@ -418,7 +418,7 @@ static int ext4_xattr_inode_read(struct inode *ea_inode, void *buf, size_t size) return ret; } -#define EXT4_XATTR_INODE_GET_PARENT(inode) ((__u32)(inode)->i_mtime.tv_sec) +#define EXT4_XATTR_INODE_GET_PARENT(inode) ((__u32)(inode_get_mtime_sec(inode))) static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino, u32 ea_inode_hash, struct inode **ea_inode) From 03c39c700227723bb0fe653a2eedd01366271901 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Thu, 19 Sep 2024 12:07:40 -0400 Subject: [PATCH 017/216] ext4: partial zero eof block on unaligned inode size extension [ Upstream commit c7fc0366c65628fd69bfc310affec4918199aae2 ] Using mapped writes, it's technically possible to expose stale post-eof data on a truncate up operation. Consider the following example: $ xfs_io -fc "pwrite 0 2k" -c "mmap 0 4k" -c "mwrite 2k 2k" \ -c "truncate 8k" -c "pread -v 2k 16" ... 00000800: 58 58 58 58 58 58 58 58 58 58 58 58 58 58 58 58 XXXXXXXXXXXXXXXX ... This shows that the post-eof data written via mwrite lands within EOF after a truncate up. While this is deliberate of the test case, behavior is somewhat unpredictable because writeback does post-eof zeroing, and writeback can occur at any time in the background. For example, an fsync inserted between the mwrite and truncate causes the subsequent read to instead return zeroes. This basically means that there is a race window in this situation between any subsequent extending operation and writeback that dictates whether post-eof data is exposed to the file or zeroed. To prevent this problem, perform partial block zeroing as part of the various inode size extending operations that are susceptible to it. For truncate extension, zero around the original eof similar to how truncate down does partial zeroing of the new eof. For extension via writes and fallocate related operations, zero the newly exposed range of the file to cover any partial zeroing that must occur at the original and new eof blocks. Signed-off-by: Brian Foster Link: https://patch.msgid.link/20240919160741.208162-2-bfoster@redhat.com Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin (cherry picked from commit 93011887013dbaa0e3a0285176ca89be153df651) --- fs/ext4/extents.c | 7 ++++++- fs/ext4/inode.c | 51 +++++++++++++++++++++++++++++++++-------------- 2 files changed, 42 insertions(+), 16 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index efa7e274ac4b3..aef914eb842e4 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4475,7 +4475,7 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset, int depth = 0; struct ext4_map_blocks map; unsigned int credits; - loff_t epos; + loff_t epos, old_size = i_size_read(inode); BUG_ON(!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)); map.m_lblk = offset; @@ -4534,6 +4534,11 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset, if (ext4_update_inode_size(inode, epos) & 0x1) inode_set_mtime_to_ts(inode, inode_get_ctime(inode)); + if (epos > old_size) { + pagecache_isize_extended(inode, old_size, epos); + ext4_zero_partial_blocks(handle, inode, + old_size, epos - old_size); + } } ret2 = ext4_mark_inode_dirty(handle, inode); ext4_update_inode_fsync_trans(handle, inode, 1); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 15e0dd22658c2..7c799b26aca4b 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1328,8 +1328,10 @@ static int ext4_write_end(struct file *file, folio_unlock(folio); folio_put(folio); - if (old_size < pos && !verity) + if (old_size < pos && !verity) { pagecache_isize_extended(inode, old_size, pos); + ext4_zero_partial_blocks(handle, inode, old_size, pos - old_size); + } /* * Don't mark the inode dirty under folio lock. First, it unnecessarily * makes the holding time of folio lock longer. Second, it forces lock @@ -1445,8 +1447,10 @@ static int ext4_journalled_write_end(struct file *file, folio_unlock(folio); folio_put(folio); - if (old_size < pos && !verity) + if (old_size < pos && !verity) { pagecache_isize_extended(inode, old_size, pos); + ext4_zero_partial_blocks(handle, inode, old_size, pos - old_size); + } if (size_changed) { ret2 = ext4_mark_inode_dirty(handle, inode); @@ -2971,7 +2975,8 @@ static int ext4_da_do_write_end(struct address_space *mapping, struct inode *inode = mapping->host; loff_t old_size = inode->i_size; bool disksize_changed = false; - loff_t new_i_size; + loff_t new_i_size, zero_len = 0; + handle_t *handle; if (unlikely(!folio_buffers(folio))) { folio_unlock(folio); @@ -3015,18 +3020,21 @@ static int ext4_da_do_write_end(struct address_space *mapping, folio_unlock(folio); folio_put(folio); - if (old_size < pos) + if (pos > old_size) { pagecache_isize_extended(inode, old_size, pos); + zero_len = pos - old_size; + } - if (disksize_changed) { - handle_t *handle; + if (!disksize_changed && !zero_len) + return copied; - handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); - if (IS_ERR(handle)) - return PTR_ERR(handle); - ext4_mark_inode_dirty(handle, inode); - ext4_journal_stop(handle); - } + handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); + if (IS_ERR(handle)) + return PTR_ERR(handle); + if (zero_len) + ext4_zero_partial_blocks(handle, inode, old_size, zero_len); + ext4_mark_inode_dirty(handle, inode); + ext4_journal_stop(handle); return copied; } @@ -5437,6 +5445,14 @@ int ext4_setattr(struct mnt_idmap *idmap, struct dentry *dentry, } if (attr->ia_size != inode->i_size) { + /* attach jbd2 jinode for EOF folio tail zeroing */ + if (attr->ia_size & (inode->i_sb->s_blocksize - 1) || + oldsize & (inode->i_sb->s_blocksize - 1)) { + error = ext4_inode_attach_jinode(inode); + if (error) + goto err_out; + } + handle = ext4_journal_start(inode, EXT4_HT_INODE, 3); if (IS_ERR(handle)) { error = PTR_ERR(handle); @@ -5447,12 +5463,17 @@ int ext4_setattr(struct mnt_idmap *idmap, struct dentry *dentry, orphan = 1; } /* - * Update c/mtime on truncate up, ext4_truncate() will - * update c/mtime in shrink case below + * Update c/mtime and tail zero the EOF folio on + * truncate up. ext4_truncate() handles the shrink case + * below. */ - if (!shrink) + if (!shrink) { inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); + if (oldsize & (inode->i_sb->s_blocksize - 1)) + ext4_block_truncate_page(handle, + inode->i_mapping, oldsize); + } if (shrink) ext4_fc_track_range(handle, inode, From 4dccf7668fbf5161e66c9d34fa4970413dd0e34e Mon Sep 17 00:00:00 2001 From: Stefan Berger Date: Thu, 4 Apr 2024 10:18:45 -0400 Subject: [PATCH 018/216] crypto: ecdsa - Convert byte arrays with key coordinates to digits [ Upstream commit d67c96fb97b5811e15c881d5cb72e293faa5f8e1 ] For NIST P192/256/384 the public key's x and y parameters could be copied directly from a given array since both parameters filled 'ndigits' of digits (a 'digit' is a u64). For support of NIST P521 the key parameters need to have leading zeros prepended to the most significant digit since only 2 bytes of the most significant digit are provided. Therefore, implement ecc_digits_from_bytes to convert a byte array into an array of digits and use this function in ecdsa_set_pub_key where an input byte array needs to be converted into digits. Suggested-by: Lukas Wunner Tested-by: Lukas Wunner Reviewed-by: Jarkko Sakkinen Signed-off-by: Stefan Berger Signed-off-by: Herbert Xu Stable-dep-of: 3b0565c70350 ("crypto: ecdsa - Avoid signed integer overflow on signature decoding") Signed-off-by: Sasha Levin (cherry picked from commit e7fcd5d696c4d020b4218f5015631596ab382475) --- crypto/ecdsa.c | 14 +++++++++----- include/crypto/internal/ecc.h | 21 +++++++++++++++++++++ 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/crypto/ecdsa.c b/crypto/ecdsa.c index 3f9ec273a121f..e819d0983bd39 100644 --- a/crypto/ecdsa.c +++ b/crypto/ecdsa.c @@ -222,9 +222,8 @@ static int ecdsa_ecc_ctx_reset(struct ecc_ctx *ctx) static int ecdsa_set_pub_key(struct crypto_akcipher *tfm, const void *key, unsigned int keylen) { struct ecc_ctx *ctx = akcipher_tfm_ctx(tfm); + unsigned int digitlen, ndigits; const unsigned char *d = key; - const u64 *digits = (const u64 *)&d[1]; - unsigned int ndigits; int ret; ret = ecdsa_ecc_ctx_reset(ctx); @@ -238,12 +237,17 @@ static int ecdsa_set_pub_key(struct crypto_akcipher *tfm, const void *key, unsig return -EINVAL; keylen--; - ndigits = (keylen >> 1) / sizeof(u64); + digitlen = keylen >> 1; + + ndigits = DIV_ROUND_UP(digitlen, sizeof(u64)); if (ndigits != ctx->curve->g.ndigits) return -EINVAL; - ecc_swap_digits(digits, ctx->pub_key.x, ndigits); - ecc_swap_digits(&digits[ndigits], ctx->pub_key.y, ndigits); + d++; + + ecc_digits_from_bytes(d, digitlen, ctx->pub_key.x, ndigits); + ecc_digits_from_bytes(&d[digitlen], digitlen, ctx->pub_key.y, ndigits); + ret = ecc_is_pubkey_valid_full(ctx->curve, &ctx->pub_key); ctx->pub_key_set = ret == 0; diff --git a/include/crypto/internal/ecc.h b/include/crypto/internal/ecc.h index 4f6c1a68882fa..ab722a8986b76 100644 --- a/include/crypto/internal/ecc.h +++ b/include/crypto/internal/ecc.h @@ -56,6 +56,27 @@ static inline void ecc_swap_digits(const void *in, u64 *out, unsigned int ndigit out[i] = get_unaligned_be64(&src[ndigits - 1 - i]); } +/** + * ecc_digits_from_bytes() - Create ndigits-sized digits array from byte array + * @in: Input byte array + * @nbytes Size of input byte array + * @out Output digits array + * @ndigits: Number of digits to create from byte array + */ +static inline void ecc_digits_from_bytes(const u8 *in, unsigned int nbytes, + u64 *out, unsigned int ndigits) +{ + unsigned int o = nbytes & 7; + __be64 msd = 0; + + if (o) { + memcpy((u8 *)&msd + sizeof(msd) - o, in, o); + out[--ndigits] = be64_to_cpu(msd); + in += o; + } + ecc_swap_digits(in, out, ndigits); +} + /** * ecc_is_key_valid() - Validate a given ECDH private key * From 8dd91b54169ff49dce92b547c136cf92e9750ae9 Mon Sep 17 00:00:00 2001 From: Stefan Berger Date: Thu, 4 Apr 2024 10:18:53 -0400 Subject: [PATCH 019/216] crypto: ecdsa - Rename keylen to bufsize where necessary [ Upstream commit 703ca5cda1ea04735e48882a7cccff97d57656c3 ] In cases where 'keylen' was referring to the size of the buffer used by a curve's digits, it does not reflect the purpose of the variable anymore once NIST P521 is used. What it refers to then is the size of the buffer, which may be a few bytes larger than the size a coordinate of a key. Therefore, rename keylen to bufsize where appropriate. Tested-by: Lukas Wunner Reviewed-by: Jarkko Sakkinen Signed-off-by: Stefan Berger Signed-off-by: Herbert Xu Stable-dep-of: 3b0565c70350 ("crypto: ecdsa - Avoid signed integer overflow on signature decoding") Signed-off-by: Sasha Levin (cherry picked from commit 1afc7acbedb8dcae865d5b650c4a12aa4a48bd07) --- crypto/ecdsa.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/crypto/ecdsa.c b/crypto/ecdsa.c index e819d0983bd39..142bed98fa978 100644 --- a/crypto/ecdsa.c +++ b/crypto/ecdsa.c @@ -35,8 +35,8 @@ struct ecdsa_signature_ctx { static int ecdsa_get_signature_rs(u64 *dest, size_t hdrlen, unsigned char tag, const void *value, size_t vlen, unsigned int ndigits) { - size_t keylen = ndigits * sizeof(u64); - ssize_t diff = vlen - keylen; + size_t bufsize = ndigits * sizeof(u64); + ssize_t diff = vlen - bufsize; const char *d = value; u8 rs[ECC_MAX_BYTES]; @@ -58,7 +58,7 @@ static int ecdsa_get_signature_rs(u64 *dest, size_t hdrlen, unsigned char tag, if (diff) return -EINVAL; } - if (-diff >= keylen) + if (-diff >= bufsize) return -EINVAL; if (diff) { @@ -138,7 +138,7 @@ static int ecdsa_verify(struct akcipher_request *req) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); struct ecc_ctx *ctx = akcipher_tfm_ctx(tfm); - size_t keylen = ctx->curve->g.ndigits * sizeof(u64); + size_t bufsize = ctx->curve->g.ndigits * sizeof(u64); struct ecdsa_signature_ctx sig_ctx = { .curve = ctx->curve, }; @@ -165,14 +165,14 @@ static int ecdsa_verify(struct akcipher_request *req) goto error; /* if the hash is shorter then we will add leading zeros to fit to ndigits */ - diff = keylen - req->dst_len; + diff = bufsize - req->dst_len; if (diff >= 0) { if (diff) memset(rawhash, 0, diff); memcpy(&rawhash[diff], buffer + req->src_len, req->dst_len); } else if (diff < 0) { /* given hash is longer, we take the left-most bytes */ - memcpy(&rawhash, buffer + req->src_len, keylen); + memcpy(&rawhash, buffer + req->src_len, bufsize); } ecc_swap_digits((u64 *)rawhash, hash, ctx->curve->g.ndigits); From 151447f1d89ce68b7fe93ef5f8dbf943d93d67bf Mon Sep 17 00:00:00 2001 From: Stefan Berger Date: Wed, 29 May 2024 19:08:27 -0400 Subject: [PATCH 020/216] crypto: ecdsa - Use ecc_digits_from_bytes to convert signature [ Upstream commit 546ce0bdc91afd9f5c4c67d9fc4733e0fc7086d1 ] Since ecc_digits_from_bytes will provide zeros when an insufficient number of bytes are passed in the input byte array, use it to convert the r and s components of the signature to digits directly from the input byte array. This avoids going through an intermediate byte array that has the first few bytes filled with zeros. Signed-off-by: Stefan Berger Reviewed-by: Jarkko Sakkinen Signed-off-by: Herbert Xu Stable-dep-of: 3b0565c70350 ("crypto: ecdsa - Avoid signed integer overflow on signature decoding") Signed-off-by: Sasha Levin (cherry picked from commit ec64889179410e67d1b2aa7b047cafaa2d0c3f43) --- crypto/ecdsa.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/crypto/ecdsa.c b/crypto/ecdsa.c index 142bed98fa978..28441311af364 100644 --- a/crypto/ecdsa.c +++ b/crypto/ecdsa.c @@ -38,7 +38,6 @@ static int ecdsa_get_signature_rs(u64 *dest, size_t hdrlen, unsigned char tag, size_t bufsize = ndigits * sizeof(u64); ssize_t diff = vlen - bufsize; const char *d = value; - u8 rs[ECC_MAX_BYTES]; if (!value || !vlen) return -EINVAL; @@ -46,7 +45,7 @@ static int ecdsa_get_signature_rs(u64 *dest, size_t hdrlen, unsigned char tag, /* diff = 0: 'value' has exacly the right size * diff > 0: 'value' has too many bytes; one leading zero is allowed that * makes the value a positive integer; error on more - * diff < 0: 'value' is missing leading zeros, which we add + * diff < 0: 'value' is missing leading zeros */ if (diff > 0) { /* skip over leading zeros that make 'value' a positive int */ @@ -61,14 +60,7 @@ static int ecdsa_get_signature_rs(u64 *dest, size_t hdrlen, unsigned char tag, if (-diff >= bufsize) return -EINVAL; - if (diff) { - /* leading zeros not given in 'value' */ - memset(rs, 0, -diff); - } - - memcpy(&rs[-diff], d, vlen); - - ecc_swap_digits((u64 *)rs, dest, ndigits); + ecc_digits_from_bytes(d, vlen, dest, ndigits); return 0; } From 897387994b37b84e3756bcf1f9a6df657aede54d Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Tue, 10 Sep 2024 16:30:24 +0200 Subject: [PATCH 021/216] crypto: ecdsa - Avoid signed integer overflow on signature decoding [ Upstream commit 3b0565c703503f832d6cd7ba805aafa3b330cb9d ] When extracting a signature component r or s from an ASN.1-encoded integer, ecdsa_get_signature_rs() subtracts the expected length "bufsize" from the ASN.1 length "vlen" (both of unsigned type size_t) and stores the result in "diff" (of signed type ssize_t). This results in a signed integer overflow if vlen > SSIZE_MAX + bufsize. The kernel is compiled with -fno-strict-overflow, which implies -fwrapv, meaning signed integer overflow is not undefined behavior. And the function does check for overflow: if (-diff >= bufsize) return -EINVAL; So the code is fine in principle but not very obvious. In the future it might trigger a false-positive with CONFIG_UBSAN_SIGNED_WRAP=y. Avoid by comparing the two unsigned variables directly and erroring out if "vlen" is too large. Signed-off-by: Lukas Wunner Reviewed-by: Stefan Berger Reviewed-by: Jonathan Cameron Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin (cherry picked from commit 4b6beff3c073b3bd0dcb4cb16822408fc51e5df1) --- crypto/ecdsa.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/crypto/ecdsa.c b/crypto/ecdsa.c index 28441311af364..da04df3c8ecf4 100644 --- a/crypto/ecdsa.c +++ b/crypto/ecdsa.c @@ -36,29 +36,24 @@ static int ecdsa_get_signature_rs(u64 *dest, size_t hdrlen, unsigned char tag, const void *value, size_t vlen, unsigned int ndigits) { size_t bufsize = ndigits * sizeof(u64); - ssize_t diff = vlen - bufsize; const char *d = value; - if (!value || !vlen) + if (!value || !vlen || vlen > bufsize + 1) return -EINVAL; - /* diff = 0: 'value' has exacly the right size - * diff > 0: 'value' has too many bytes; one leading zero is allowed that - * makes the value a positive integer; error on more - * diff < 0: 'value' is missing leading zeros + /* + * vlen may be 1 byte larger than bufsize due to a leading zero byte + * (necessary if the most significant bit of the integer is set). */ - if (diff > 0) { + if (vlen > bufsize) { /* skip over leading zeros that make 'value' a positive int */ if (*d == 0) { vlen -= 1; - diff--; d++; - } - if (diff) + } else { return -EINVAL; + } } - if (-diff >= bufsize) - return -EINVAL; ecc_digits_from_bytes(d, vlen, dest, ndigits); From 4627697b865793ce50485c75b6ff5c780c71fad9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sun, 17 Sep 2023 13:22:17 +0200 Subject: [PATCH 022/216] cleanup: Add conditional guard support [ Upstream commit e4ab322fbaaaf84b23d6cb0e3317a7f68baf36dc ] Adds: - DEFINE_GUARD_COND() / DEFINE_LOCK_GUARD_1_COND() to extend existing guards with conditional lock primitives, eg. mutex_trylock(), mutex_lock_interruptible(). nb. both primitives allow NULL 'locks', which cause the lock to fail (obviously). - extends scoped_guard() to not take the body when the the conditional guard 'fails'. eg. scoped_guard (mutex_intr, &task->signal_cred_guard_mutex) { ... } will only execute the body when the mutex is held. - provides scoped_cond_guard(name, fail, args...); which extends scoped_guard() to do fail when the lock-acquire fails. Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20231102110706.460851167%40infradead.org Stable-dep-of: fcc22ac5baf0 ("cleanup: Adjust scoped_guard() macros to avoid potential warning") Signed-off-by: Sasha Levin (cherry picked from commit 873df38bdf42c076842b0e55e2f410e8a4662347) --- include/linux/cleanup.h | 52 +++++++++++++++++++++++++++++++++++++--- include/linux/mutex.h | 3 ++- include/linux/rwsem.h | 8 +++---- include/linux/spinlock.h | 15 ++++++++++++ 4 files changed, 70 insertions(+), 8 deletions(-) diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h index 53f1a7a932b08..6d7bfa899df02 100644 --- a/include/linux/cleanup.h +++ b/include/linux/cleanup.h @@ -92,25 +92,55 @@ static inline class_##_name##_t class_##_name##ext##_constructor(_init_args) \ * trivial wrapper around DEFINE_CLASS() above specifically * for locks. * + * DEFINE_GUARD_COND(name, ext, condlock) + * wrapper around EXTEND_CLASS above to add conditional lock + * variants to a base class, eg. mutex_trylock() or + * mutex_lock_interruptible(). + * * guard(name): - * an anonymous instance of the (guard) class + * an anonymous instance of the (guard) class, not recommended for + * conditional locks. * * scoped_guard (name, args...) { }: * similar to CLASS(name, scope)(args), except the variable (with the * explicit name 'scope') is declard in a for-loop such that its scope is * bound to the next (compound) statement. * + * for conditional locks the loop body is skipped when the lock is not + * acquired. + * + * scoped_cond_guard (name, fail, args...) { }: + * similar to scoped_guard(), except it does fail when the lock + * acquire fails. + * */ #define DEFINE_GUARD(_name, _type, _lock, _unlock) \ - DEFINE_CLASS(_name, _type, _unlock, ({ _lock; _T; }), _type _T) + DEFINE_CLASS(_name, _type, if (_T) { _unlock; }, ({ _lock; _T; }), _type _T); \ + static inline void * class_##_name##_lock_ptr(class_##_name##_t *_T) \ + { return *_T; } + +#define DEFINE_GUARD_COND(_name, _ext, _condlock) \ + EXTEND_CLASS(_name, _ext, \ + ({ void *_t = _T; if (_T && !(_condlock)) _t = NULL; _t; }), \ + class_##_name##_t _T) \ + static inline void * class_##_name##_ext##_lock_ptr(class_##_name##_t *_T) \ + { return class_##_name##_lock_ptr(_T); } #define guard(_name) \ CLASS(_name, __UNIQUE_ID(guard)) +#define __guard_ptr(_name) class_##_name##_lock_ptr + #define scoped_guard(_name, args...) \ for (CLASS(_name, scope)(args), \ - *done = NULL; !done; done = (void *)1) + *done = NULL; __guard_ptr(_name)(&scope) && !done; done = (void *)1) + +#define scoped_cond_guard(_name, _fail, args...) \ + for (CLASS(_name, scope)(args), \ + *done = NULL; !done; done = (void *)1) \ + if (!__guard_ptr(_name)(&scope)) _fail; \ + else /* * Additional helper macros for generating lock guards with types, either for @@ -119,6 +149,7 @@ static inline class_##_name##_t class_##_name##ext##_constructor(_init_args) \ * * DEFINE_LOCK_GUARD_0(name, lock, unlock, ...) * DEFINE_LOCK_GUARD_1(name, type, lock, unlock, ...) + * DEFINE_LOCK_GUARD_1_COND(name, ext, condlock) * * will result in the following type: * @@ -140,6 +171,11 @@ typedef struct { \ static inline void class_##_name##_destructor(class_##_name##_t *_T) \ { \ if (_T->lock) { _unlock; } \ +} \ + \ +static inline void *class_##_name##_lock_ptr(class_##_name##_t *_T) \ +{ \ + return _T->lock; \ } @@ -168,4 +204,14 @@ __DEFINE_LOCK_GUARD_1(_name, _type, _lock) __DEFINE_UNLOCK_GUARD(_name, void, _unlock, __VA_ARGS__) \ __DEFINE_LOCK_GUARD_0(_name, _lock) +#define DEFINE_LOCK_GUARD_1_COND(_name, _ext, _condlock) \ + EXTEND_CLASS(_name, _ext, \ + ({ class_##_name##_t _t = { .lock = l }, *_T = &_t;\ + if (_T->lock && !(_condlock)) _T->lock = NULL; \ + _t; }), \ + typeof_member(class_##_name##_t, lock) l) \ + static inline void * class_##_name##_ext##_lock_ptr(class_##_name##_t *_T) \ + { return class_##_name##_lock_ptr(_T); } + + #endif /* __LINUX_GUARDS_H */ diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 5b5630e58407a..e1c323c7d75ba 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -248,6 +248,7 @@ extern void mutex_unlock(struct mutex *lock); extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); DEFINE_GUARD(mutex, struct mutex *, mutex_lock(_T), mutex_unlock(_T)) -DEFINE_FREE(mutex, struct mutex *, if (_T) mutex_unlock(_T)) +DEFINE_GUARD_COND(mutex, _try, mutex_trylock(_T)) +DEFINE_GUARD_COND(mutex, _intr, mutex_lock_interruptible(_T) == 0) #endif /* __LINUX_MUTEX_H */ diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index 1dd530ce8b45b..9c29689ff505e 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -203,11 +203,11 @@ extern void up_read(struct rw_semaphore *sem); extern void up_write(struct rw_semaphore *sem); DEFINE_GUARD(rwsem_read, struct rw_semaphore *, down_read(_T), up_read(_T)) -DEFINE_GUARD(rwsem_write, struct rw_semaphore *, down_write(_T), up_write(_T)) - -DEFINE_FREE(up_read, struct rw_semaphore *, if (_T) up_read(_T)) -DEFINE_FREE(up_write, struct rw_semaphore *, if (_T) up_write(_T)) +DEFINE_GUARD_COND(rwsem_read, _try, down_read_trylock(_T)) +DEFINE_GUARD_COND(rwsem_read, _intr, down_read_interruptible(_T) == 0) +DEFINE_GUARD(rwsem_write, struct rw_semaphore *, down_write(_T), up_write(_T)) +DEFINE_GUARD_COND(rwsem_write, _try, down_write_trylock(_T)) /* * downgrade write lock to read lock diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 31d3d747a9db7..ceb56b39c70f7 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -507,6 +507,8 @@ DEFINE_LOCK_GUARD_1(raw_spinlock, raw_spinlock_t, raw_spin_lock(_T->lock), raw_spin_unlock(_T->lock)) +DEFINE_LOCK_GUARD_1_COND(raw_spinlock, _try, raw_spin_trylock(_T->lock)) + DEFINE_LOCK_GUARD_1(raw_spinlock_nested, raw_spinlock_t, raw_spin_lock_nested(_T->lock, SINGLE_DEPTH_NESTING), raw_spin_unlock(_T->lock)) @@ -515,23 +517,36 @@ DEFINE_LOCK_GUARD_1(raw_spinlock_irq, raw_spinlock_t, raw_spin_lock_irq(_T->lock), raw_spin_unlock_irq(_T->lock)) +DEFINE_LOCK_GUARD_1_COND(raw_spinlock_irq, _try, raw_spin_trylock_irq(_T->lock)) + DEFINE_LOCK_GUARD_1(raw_spinlock_irqsave, raw_spinlock_t, raw_spin_lock_irqsave(_T->lock, _T->flags), raw_spin_unlock_irqrestore(_T->lock, _T->flags), unsigned long flags) +DEFINE_LOCK_GUARD_1_COND(raw_spinlock_irqsave, _try, + raw_spin_trylock_irqsave(_T->lock, _T->flags)) + DEFINE_LOCK_GUARD_1(spinlock, spinlock_t, spin_lock(_T->lock), spin_unlock(_T->lock)) +DEFINE_LOCK_GUARD_1_COND(spinlock, _try, spin_trylock(_T->lock)) + DEFINE_LOCK_GUARD_1(spinlock_irq, spinlock_t, spin_lock_irq(_T->lock), spin_unlock_irq(_T->lock)) +DEFINE_LOCK_GUARD_1_COND(spinlock_irq, _try, + spin_trylock_irq(_T->lock)) + DEFINE_LOCK_GUARD_1(spinlock_irqsave, spinlock_t, spin_lock_irqsave(_T->lock, _T->flags), spin_unlock_irqrestore(_T->lock, _T->flags), unsigned long flags) +DEFINE_LOCK_GUARD_1_COND(spinlock_irqsave, _try, + spin_trylock_irqsave(_T->lock, _T->flags)) + #undef __LINUX_INSIDE_SPINLOCK_H #endif /* __LINUX_SPINLOCK_H */ From 2c5665dee9364633971169cd89450d2dfff8abd5 Mon Sep 17 00:00:00 2001 From: Przemek Kitszel Date: Fri, 18 Oct 2024 13:38:14 +0200 Subject: [PATCH 023/216] cleanup: Adjust scoped_guard() macros to avoid potential warning [ Upstream commit fcc22ac5baf06dd17193de44b60dbceea6461983 ] Change scoped_guard() and scoped_cond_guard() macros to make reasoning about them easier for static analysis tools (smatch, compiler diagnostics), especially to enable them to tell if the given usage of scoped_guard() is with a conditional lock class (interruptible-locks, try-locks) or not (like simple mutex_lock()). Add compile-time error if scoped_cond_guard() is used for non-conditional lock class. Beyond easier tooling and a little shrink reported by bloat-o-meter this patch enables developer to write code like: int foo(struct my_drv *adapter) { scoped_guard(spinlock, &adapter->some_spinlock) return adapter->spinlock_protected_var; } Current scoped_guard() implementation does not support that, due to compiler complaining: error: control reaches end of non-void function [-Werror=return-type] Technical stuff about the change: scoped_guard() macro uses common idiom of using "for" statement to declare a scoped variable. Unfortunately, current logic is too hard for compiler diagnostics to be sure that there is exactly one loop step; fix that. To make any loop so trivial that there is no above warning, it must not depend on any non-const variable to tell if there are more steps. There is no obvious solution for that in C, but one could use the compound statement expression with "goto" jumping past the "loop", effectively leaving only the subscope part of the loop semantics. More impl details: one more level of macro indirection is now needed to avoid duplicating label names; I didn't spot any other place that is using the "for (...; goto label) if (0) label: break;" idiom, so it's not packed for reuse beyond scoped_guard() family, what makes actual macros code cleaner. There was also a need to introduce const true/false variable per lock class, it is used to aid compiler diagnostics reasoning about "exactly 1 step" loops (note that converting that to function would undo the whole benefit). Big thanks to Andy Shevchenko for help on this patch, both internal and public, ranging from whitespace/formatting, through commit message clarifications, general improvements, ending with presenting alternative approaches - all despite not even liking the idea. Big thanks to Dmitry Torokhov for the idea of compile-time check for scoped_cond_guard() (to use it only with conditional locsk), and general improvements for the patch. Big thanks to David Lechner for idea to cover also scoped_cond_guard(). Signed-off-by: Przemek Kitszel Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Dmitry Torokhov Link: https://lkml.kernel.org/r/20241018113823.171256-1-przemyslaw.kitszel@intel.com Signed-off-by: Sasha Levin (cherry picked from commit 8fa6f680b5aad8f0aceacb1404fc8464269167ed) --- include/linux/cleanup.h | 52 +++++++++++++++++++++++++++++++++-------- 1 file changed, 42 insertions(+), 10 deletions(-) diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h index 6d7bfa899df02..f0c6d1d45e679 100644 --- a/include/linux/cleanup.h +++ b/include/linux/cleanup.h @@ -113,14 +113,20 @@ static inline class_##_name##_t class_##_name##ext##_constructor(_init_args) \ * similar to scoped_guard(), except it does fail when the lock * acquire fails. * + * Only for conditional locks. */ +#define __DEFINE_CLASS_IS_CONDITIONAL(_name, _is_cond) \ +static __maybe_unused const bool class_##_name##_is_conditional = _is_cond + #define DEFINE_GUARD(_name, _type, _lock, _unlock) \ + __DEFINE_CLASS_IS_CONDITIONAL(_name, false); \ DEFINE_CLASS(_name, _type, if (_T) { _unlock; }, ({ _lock; _T; }), _type _T); \ static inline void * class_##_name##_lock_ptr(class_##_name##_t *_T) \ { return *_T; } #define DEFINE_GUARD_COND(_name, _ext, _condlock) \ + __DEFINE_CLASS_IS_CONDITIONAL(_name##_ext, true); \ EXTEND_CLASS(_name, _ext, \ ({ void *_t = _T; if (_T && !(_condlock)) _t = NULL; _t; }), \ class_##_name##_t _T) \ @@ -131,17 +137,40 @@ static inline class_##_name##_t class_##_name##ext##_constructor(_init_args) \ CLASS(_name, __UNIQUE_ID(guard)) #define __guard_ptr(_name) class_##_name##_lock_ptr +#define __is_cond_ptr(_name) class_##_name##_is_conditional -#define scoped_guard(_name, args...) \ - for (CLASS(_name, scope)(args), \ - *done = NULL; __guard_ptr(_name)(&scope) && !done; done = (void *)1) - -#define scoped_cond_guard(_name, _fail, args...) \ - for (CLASS(_name, scope)(args), \ - *done = NULL; !done; done = (void *)1) \ - if (!__guard_ptr(_name)(&scope)) _fail; \ - else - +/* + * Helper macro for scoped_guard(). + * + * Note that the "!__is_cond_ptr(_name)" part of the condition ensures that + * compiler would be sure that for the unconditional locks the body of the + * loop (caller-provided code glued to the else clause) could not be skipped. + * It is needed because the other part - "__guard_ptr(_name)(&scope)" - is too + * hard to deduce (even if could be proven true for unconditional locks). + */ +#define __scoped_guard(_name, _label, args...) \ + for (CLASS(_name, scope)(args); \ + __guard_ptr(_name)(&scope) || !__is_cond_ptr(_name); \ + ({ goto _label; })) \ + if (0) { \ +_label: \ + break; \ + } else + +#define scoped_guard(_name, args...) \ + __scoped_guard(_name, __UNIQUE_ID(label), args) + +#define __scoped_cond_guard(_name, _fail, _label, args...) \ + for (CLASS(_name, scope)(args); true; ({ goto _label; })) \ + if (!__guard_ptr(_name)(&scope)) { \ + BUILD_BUG_ON(!__is_cond_ptr(_name)); \ + _fail; \ +_label: \ + break; \ + } else + +#define scoped_cond_guard(_name, _fail, args...) \ + __scoped_cond_guard(_name, _fail, __UNIQUE_ID(label), args) /* * Additional helper macros for generating lock guards with types, either for * locks that don't have a native type (eg. RCU, preempt) or those that need a @@ -197,14 +226,17 @@ static inline class_##_name##_t class_##_name##_constructor(void) \ } #define DEFINE_LOCK_GUARD_1(_name, _type, _lock, _unlock, ...) \ +__DEFINE_CLASS_IS_CONDITIONAL(_name, false); \ __DEFINE_UNLOCK_GUARD(_name, _type, _unlock, __VA_ARGS__) \ __DEFINE_LOCK_GUARD_1(_name, _type, _lock) #define DEFINE_LOCK_GUARD_0(_name, _lock, _unlock, ...) \ +__DEFINE_CLASS_IS_CONDITIONAL(_name, false); \ __DEFINE_UNLOCK_GUARD(_name, void, _unlock, __VA_ARGS__) \ __DEFINE_LOCK_GUARD_0(_name, _lock) #define DEFINE_LOCK_GUARD_1_COND(_name, _ext, _condlock) \ + __DEFINE_CLASS_IS_CONDITIONAL(_name##_ext, true); \ EXTEND_CLASS(_name, _ext, \ ({ class_##_name##_t _t = { .lock = l }, *_T = &_t;\ if (_T->lock && !(_condlock)) _T->lock = NULL; \ From fa4b38292ccb682d3faf5811e43a12e0bb7c38aa Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Sun, 15 Jan 2023 22:52:10 +0200 Subject: [PATCH 024/216] media: uvcvideo: Force UVC version to 1.0a for 0408:4035 [ Upstream commit c397e8c45d911443b4ab60084fb723edf2a5b604 ] The Quanta ACER HD User Facing camera reports a UVC 1.50 version, but implements UVC 1.0a as shown by the UVC probe control being 26 bytes long. Force the UVC version for that device. Reported-by: Giuliano Lotta Closes: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/2000947 Link: https://lore.kernel.org/r/20230115205210.20077-1-laurent.pinchart@ideasonboard.com Tested-by: Giuliano Lotta Signed-off-by: Laurent Pinchart Stable-dep-of: c9df99302fff ("media: uvcvideo: Force UVC version to 1.0a for 0408:4033") Signed-off-by: Sasha Levin (cherry picked from commit 9471b8f80526ea1e51413afebc761b345ec0904d) --- drivers/media/usb/uvc/uvc_driver.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/media/usb/uvc/uvc_driver.c b/drivers/media/usb/uvc/uvc_driver.c index 5a3e933df6335..adf1abce53331 100644 --- a/drivers/media/usb/uvc/uvc_driver.c +++ b/drivers/media/usb/uvc/uvc_driver.c @@ -2519,6 +2519,17 @@ static const struct usb_device_id uvc_ids[] = { .bInterfaceSubClass = 1, .bInterfaceProtocol = UVC_PC_PROTOCOL_15, .driver_info = (kernel_ulong_t)&uvc_ctrl_power_line_limited }, + /* Quanta ACER HD User Facing */ + { .match_flags = USB_DEVICE_ID_MATCH_DEVICE + | USB_DEVICE_ID_MATCH_INT_INFO, + .idVendor = 0x0408, + .idProduct = 0x4035, + .bInterfaceClass = USB_CLASS_VIDEO, + .bInterfaceSubClass = 1, + .bInterfaceProtocol = UVC_PC_PROTOCOL_15, + .driver_info = (kernel_ulong_t)&(const struct uvc_device_info){ + .uvc_version = 0x010a, + } }, /* LogiLink Wireless Webcam */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, From 582ab7b24caff1859509c750a0d2c59b8cec0bb8 Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Date: Tue, 24 Sep 2024 13:33:29 +0000 Subject: [PATCH 025/216] media: uvcvideo: Force UVC version to 1.0a for 0408:4033 [ Upstream commit c9df99302fff53b6007666136b9f43fbac7ee3d8 ] The Quanta ACER HD User Facing camera reports a UVC 1.50 version, but implements UVC 1.0a as shown by the UVC probe control being 26 bytes long. Force the UVC version for that device. Reported-by: Giuliano Lotta Closes: https://lore.kernel.org/linux-media/fce4f906-d69b-417d-9f13-bf69fe5c81e3@koyu.space/ Signed-off-by: Ricardo Ribalda Reviewed-by: Laurent Pinchart Link: https://lore.kernel.org/r/20240924-uvc-quanta-v1-1-2de023863767@chromium.org Signed-off-by: Laurent Pinchart Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin (cherry picked from commit ed01e57a81694ea2b44e6cc98ac7fd2272037c9a) --- drivers/media/usb/uvc/uvc_driver.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/media/usb/uvc/uvc_driver.c b/drivers/media/usb/uvc/uvc_driver.c index adf1abce53331..1b05890f99f4f 100644 --- a/drivers/media/usb/uvc/uvc_driver.c +++ b/drivers/media/usb/uvc/uvc_driver.c @@ -2520,6 +2520,17 @@ static const struct usb_device_id uvc_ids[] = { .bInterfaceProtocol = UVC_PC_PROTOCOL_15, .driver_info = (kernel_ulong_t)&uvc_ctrl_power_line_limited }, /* Quanta ACER HD User Facing */ + { .match_flags = USB_DEVICE_ID_MATCH_DEVICE + | USB_DEVICE_ID_MATCH_INT_INFO, + .idVendor = 0x0408, + .idProduct = 0x4033, + .bInterfaceClass = USB_CLASS_VIDEO, + .bInterfaceSubClass = 1, + .bInterfaceProtocol = UVC_PC_PROTOCOL_15, + .driver_info = (kernel_ulong_t)&(const struct uvc_device_info){ + .uvc_version = 0x010a, + } }, + /* Quanta ACER HD User Facing */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x0408, From 8c1880a76bd7b09d5983bef4062dc8b7ba5da8ca Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Thu, 22 Aug 2024 09:42:54 +0800 Subject: [PATCH 026/216] wifi: mac80211: export ieee80211_purge_tx_queue() for drivers [ Upstream commit 53bc1b73b67836ac9867f93dee7a443986b4a94f ] Drivers need to purge TX SKB when stopping. Using skb_queue_purge() can't report TX status to mac80211, causing ieee80211_free_ack_frame() warns "Have pending ack frames!". Export ieee80211_purge_tx_queue() for drivers to not have to reimplement it. Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20240822014255.10211-1-pkshih@realtek.com Signed-off-by: Johannes Berg Stable-dep-of: 3e5e4a801aaf ("wifi: rtw88: use ieee80211_purge_tx_queue() to purge TX skb") Signed-off-by: Sasha Levin (cherry picked from commit 24b5898a8c73126c7ada3f1e5235a33af5b67460) --- include/net/mac80211.h | 13 +++++++++++++ net/mac80211/ieee80211_i.h | 2 -- net/mac80211/status.c | 1 + 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/include/net/mac80211.h b/include/net/mac80211.h index d9f73db30db8a..cffaee1b91033 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -3045,6 +3045,19 @@ ieee80211_get_alt_retry_rate(const struct ieee80211_hw *hw, */ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb); +/** + * ieee80211_purge_tx_queue - purge TX skb queue + * @hw: the hardware + * @skbs: the skbs + * + * Free a set of transmit skbs. Use this function when device is going to stop + * but some transmit skbs without TX status are still queued. + * This function does not take the list lock and the caller must hold the + * relevant locks to use it. + */ +void ieee80211_purge_tx_queue(struct ieee80211_hw *hw, + struct sk_buff_head *skbs); + /** * DOC: Hardware crypto acceleration * diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index daea061d0fc13..04c876d78d3bf 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -2057,8 +2057,6 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb, u32 info_flags, u32 ctrl_flags, u64 *cookie); -void ieee80211_purge_tx_queue(struct ieee80211_hw *hw, - struct sk_buff_head *skbs); struct sk_buff * ieee80211_build_data_template(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, u32 info_flags); diff --git a/net/mac80211/status.c b/net/mac80211/status.c index f7e5524667310..17774483428f1 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -1270,3 +1270,4 @@ void ieee80211_purge_tx_queue(struct ieee80211_hw *hw, while ((skb = __skb_dequeue(skbs))) ieee80211_free_txskb(hw, skb); } +EXPORT_SYMBOL(ieee80211_purge_tx_queue); From 57d6b24a3389193117c89fadee51d9594f578cb5 Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Thu, 22 Aug 2024 09:42:55 +0800 Subject: [PATCH 027/216] wifi: rtw88: use ieee80211_purge_tx_queue() to purge TX skb [ Upstream commit 3e5e4a801aaf4283390cc34959c6c48f910ca5ea ] When removing kernel modules by: rmmod rtw88_8723cs rtw88_8703b rtw88_8723x rtw88_sdio rtw88_core Driver uses skb_queue_purge() to purge TX skb, but not report tx status causing "Have pending ack frames!" warning. Use ieee80211_purge_tx_queue() to correct this. Since ieee80211_purge_tx_queue() doesn't take locks, to prevent racing between TX work and purge TX queue, flush and destroy TX work in advance. wlan0: deauthenticating from aa:f5:fd:60:4c:a8 by local choice (Reason: 3=DEAUTH_LEAVING) ------------[ cut here ]------------ Have pending ack frames! WARNING: CPU: 3 PID: 9232 at net/mac80211/main.c:1691 ieee80211_free_ack_frame+0x5c/0x90 [mac80211] CPU: 3 PID: 9232 Comm: rmmod Tainted: G C 6.10.1-200.fc40.aarch64 #1 Hardware name: pine64 Pine64 PinePhone Braveheart (1.1)/Pine64 PinePhone Braveheart (1.1), BIOS 2024.01 01/01/2024 pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : ieee80211_free_ack_frame+0x5c/0x90 [mac80211] lr : ieee80211_free_ack_frame+0x5c/0x90 [mac80211] sp : ffff80008c1b37b0 x29: ffff80008c1b37b0 x28: ffff000003be8000 x27: 0000000000000000 x26: 0000000000000000 x25: ffff000003dc14b8 x24: ffff80008c1b37d0 x23: ffff000000ff9f80 x22: 0000000000000000 x21: 000000007fffffff x20: ffff80007c7e93d8 x19: ffff00006e66f400 x18: 0000000000000000 x17: ffff7ffffd2b3000 x16: ffff800083fc0000 x15: 0000000000000000 x14: 0000000000000000 x13: 2173656d61726620 x12: 6b636120676e6964 x11: 0000000000000000 x10: 000000000000005d x9 : ffff8000802af2b0 x8 : ffff80008c1b3430 x7 : 0000000000000001 x6 : 0000000000000001 x5 : 0000000000000000 x4 : 0000000000000000 x3 : 0000000000000000 x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff000003be8000 Call trace: ieee80211_free_ack_frame+0x5c/0x90 [mac80211] idr_for_each+0x74/0x110 ieee80211_free_hw+0x44/0xe8 [mac80211] rtw_sdio_remove+0x9c/0xc0 [rtw88_sdio] sdio_bus_remove+0x44/0x180 device_remove+0x54/0x90 device_release_driver_internal+0x1d4/0x238 driver_detach+0x54/0xc0 bus_remove_driver+0x78/0x108 driver_unregister+0x38/0x78 sdio_unregister_driver+0x2c/0x40 rtw_8723cs_driver_exit+0x18/0x1000 [rtw88_8723cs] __do_sys_delete_module.isra.0+0x190/0x338 __arm64_sys_delete_module+0x1c/0x30 invoke_syscall+0x74/0x100 el0_svc_common.constprop.0+0x48/0xf0 do_el0_svc+0x24/0x38 el0_svc+0x3c/0x158 el0t_64_sync_handler+0x120/0x138 el0t_64_sync+0x194/0x198 ---[ end trace 0000000000000000 ]--- Reported-by: Peter Robinson Closes: https://lore.kernel.org/linux-wireless/CALeDE9OAa56KMzgknaCD3quOgYuEHFx9_hcT=OFgmMAb+8MPyA@mail.gmail.com/ Tested-by: Ping-Ke Shih # 8723DU Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20240822014255.10211-2-pkshih@realtek.com Signed-off-by: Sasha Levin (cherry picked from commit 3d94c4b21966b49c3e26ceeefacaa11ff7ee6d68) --- drivers/net/wireless/realtek/rtw88/sdio.c | 6 +++--- drivers/net/wireless/realtek/rtw88/usb.c | 5 +++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw88/sdio.c b/drivers/net/wireless/realtek/rtw88/sdio.c index 0cae5746f540f..5bd1ee81210d1 100644 --- a/drivers/net/wireless/realtek/rtw88/sdio.c +++ b/drivers/net/wireless/realtek/rtw88/sdio.c @@ -1295,12 +1295,12 @@ static void rtw_sdio_deinit_tx(struct rtw_dev *rtwdev) struct rtw_sdio *rtwsdio = (struct rtw_sdio *)rtwdev->priv; int i; - for (i = 0; i < RTK_MAX_TX_QUEUE_NUM; i++) - skb_queue_purge(&rtwsdio->tx_queue[i]); - flush_workqueue(rtwsdio->txwq); destroy_workqueue(rtwsdio->txwq); kfree(rtwsdio->tx_handler_data); + + for (i = 0; i < RTK_MAX_TX_QUEUE_NUM; i++) + ieee80211_purge_tx_queue(rtwdev->hw, &rtwsdio->tx_queue[i]); } int rtw_sdio_probe(struct sdio_func *sdio_func, diff --git a/drivers/net/wireless/realtek/rtw88/usb.c b/drivers/net/wireless/realtek/rtw88/usb.c index 04a64afcbf8a2..8f1d653282b7e 100644 --- a/drivers/net/wireless/realtek/rtw88/usb.c +++ b/drivers/net/wireless/realtek/rtw88/usb.c @@ -416,10 +416,11 @@ static void rtw_usb_tx_handler(struct work_struct *work) static void rtw_usb_tx_queue_purge(struct rtw_usb *rtwusb) { + struct rtw_dev *rtwdev = rtwusb->rtwdev; int i; for (i = 0; i < ARRAY_SIZE(rtwusb->tx_queue); i++) - skb_queue_purge(&rtwusb->tx_queue[i]); + ieee80211_purge_tx_queue(rtwdev->hw, &rtwusb->tx_queue[i]); } static void rtw_usb_write_port_complete(struct urb *urb) @@ -801,9 +802,9 @@ static void rtw_usb_deinit_tx(struct rtw_dev *rtwdev) { struct rtw_usb *rtwusb = rtw_get_usb_priv(rtwdev); - rtw_usb_tx_queue_purge(rtwusb); flush_workqueue(rtwusb->txwq); destroy_workqueue(rtwusb->txwq); + rtw_usb_tx_queue_purge(rtwusb); } static int rtw_usb_intf_init(struct rtw_dev *rtwdev, From e7e7be8896243e384dd4a9c4acc86aee8b5d50db Mon Sep 17 00:00:00 2001 From: Karthikeyan Periyasamy Date: Tue, 21 Nov 2023 05:28:11 +0530 Subject: [PATCH 028/216] wifi: ath12k: Optimize the mac80211 hw data access [ Upstream commit 842addae02089fce4731be1c8d7d539449d4d009 ] Currently mac80211 hw data is accessed by convert the hw to radio (ar) structure and then radio to hw structure which is not necessary in some places where mac80211 hw data is already present. So in that kind of places avoid the conversion and directly access the mac80211 hw data. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.0.1-00029-QCAHKSWPL_SILICONZ-1 Signed-off-by: Karthikeyan Periyasamy Acked-by: Jeff Johnson Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20231120235812.2602198-2-quic_periyasa@quicinc.com Stable-dep-of: 8fac3266c68a ("wifi: ath12k: fix atomic calls in ath12k_mac_op_set_bitrate_mask()") Signed-off-by: Sasha Levin (cherry picked from commit 4eceef729c84e52d80f48be2362b5b666c9bf2c4) --- drivers/net/wireless/ath/ath12k/mac.c | 14 +++++++------- drivers/net/wireless/ath/ath12k/reg.c | 6 +++--- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index 5098764d35de5..f324549aeb44f 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -4945,7 +4945,7 @@ static void ath12k_mac_op_tx(struct ieee80211_hw *hw, if (ret) { ath12k_warn(ar->ab, "failed to queue management frame %d\n", ret); - ieee80211_free_txskb(ar->hw, skb); + ieee80211_free_txskb(hw, skb); } return; } @@ -4953,7 +4953,7 @@ static void ath12k_mac_op_tx(struct ieee80211_hw *hw, ret = ath12k_dp_tx(ar, arvif, skb); if (ret) { ath12k_warn(ar->ab, "failed to transmit frame %d\n", ret); - ieee80211_free_txskb(ar->hw, skb); + ieee80211_free_txskb(hw, skb); } } @@ -5496,7 +5496,7 @@ static int ath12k_mac_op_add_interface(struct ieee80211_hw *hw, goto err_peer_del; param_id = WMI_VDEV_PARAM_RTS_THRESHOLD; - param_value = ar->hw->wiphy->rts_threshold; + param_value = hw->wiphy->rts_threshold; ret = ath12k_wmi_vdev_set_param_cmd(ar, arvif->vdev_id, param_id, param_value); if (ret) { @@ -6676,7 +6676,7 @@ ath12k_mac_op_set_bitrate_mask(struct ieee80211_hw *hw, arvif->vdev_id, ret); return ret; } - ieee80211_iterate_stations_atomic(ar->hw, + ieee80211_iterate_stations_atomic(hw, ath12k_mac_disable_peer_fixed_rate, arvif); } else if (ath12k_mac_bitrate_mask_get_single_nss(ar, band, mask, @@ -6722,14 +6722,14 @@ ath12k_mac_op_set_bitrate_mask(struct ieee80211_hw *hw, return -EINVAL; } - ieee80211_iterate_stations_atomic(ar->hw, + ieee80211_iterate_stations_atomic(hw, ath12k_mac_disable_peer_fixed_rate, arvif); mutex_lock(&ar->conf_mutex); arvif->bitrate_mask = *mask; - ieee80211_iterate_stations_atomic(ar->hw, + ieee80211_iterate_stations_atomic(hw, ath12k_mac_set_bitrate_mask_iter, arvif); @@ -6767,7 +6767,7 @@ ath12k_mac_op_reconfig_complete(struct ieee80211_hw *hw, ath12k_warn(ar->ab, "pdev %d successfully recovered\n", ar->pdev->pdev_id); ar->state = ATH12K_STATE_ON; - ieee80211_wake_queues(ar->hw); + ieee80211_wake_queues(hw); if (ab->is_reset) { recovery_count = atomic_inc_return(&ab->recovery_count); diff --git a/drivers/net/wireless/ath/ath12k/reg.c b/drivers/net/wireless/ath/ath12k/reg.c index 32bdefeccc245..837a3e1ec3a49 100644 --- a/drivers/net/wireless/ath/ath12k/reg.c +++ b/drivers/net/wireless/ath/ath12k/reg.c @@ -28,11 +28,11 @@ static const struct ieee80211_regdomain ath12k_world_regd = { } }; -static bool ath12k_regdom_changes(struct ath12k *ar, char *alpha2) +static bool ath12k_regdom_changes(struct ieee80211_hw *hw, char *alpha2) { const struct ieee80211_regdomain *regd; - regd = rcu_dereference_rtnl(ar->hw->wiphy->regd); + regd = rcu_dereference_rtnl(hw->wiphy->regd); /* This can happen during wiphy registration where the previous * user request is received before we update the regd received * from firmware. @@ -71,7 +71,7 @@ ath12k_reg_notifier(struct wiphy *wiphy, struct regulatory_request *request) return; } - if (!ath12k_regdom_changes(ar, request->alpha2)) { + if (!ath12k_regdom_changes(hw, request->alpha2)) { ath12k_dbg(ar->ab, ATH12K_DBG_REG, "Country is already set\n"); return; } From d2a662793ef8e5127946d73a767ea4475addae59 Mon Sep 17 00:00:00 2001 From: Rory Little Date: Mon, 5 Aug 2024 17:40:23 -0700 Subject: [PATCH 029/216] wifi: mac80211: Add non-atomic station iterator [ Upstream commit 7c3b69eadea9e57c28bf914b0fd70f268f3682e1 ] Drivers may at times want to iterate their stations with a function which requires some non-atomic operations. ieee80211_iterate_stations_mtx() introduces an API to iterate stations while holding that wiphy's mutex. This allows the iterating function to do non-atomic operations safely. Signed-off-by: Rory Little Link: https://patch.msgid.link/20240806004024.2014080-2-rory@candelatech.com [unify internal list iteration functions] Signed-off-by: Johannes Berg Stable-dep-of: 8fac3266c68a ("wifi: ath12k: fix atomic calls in ath12k_mac_op_set_bitrate_mask()") Signed-off-by: Sasha Levin (cherry picked from commit dc60941085732397aab2d2d0e9167e6abd19f03f) --- include/net/mac80211.h | 18 ++++++++++++++++++ net/mac80211/util.c | 16 +++++++++++++++- 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/include/net/mac80211.h b/include/net/mac80211.h index cffaee1b91033..240d68a2c9059 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -6097,6 +6097,24 @@ void ieee80211_iterate_stations_atomic(struct ieee80211_hw *hw, void (*iterator)(void *data, struct ieee80211_sta *sta), void *data); + +/** + * ieee80211_iterate_stations_mtx - iterate stations + * + * This function iterates over all stations associated with a given + * hardware that are currently uploaded to the driver and calls the callback + * function for them. This version can only be used while holding the wiphy + * mutex. + * + * @hw: the hardware struct of which the interfaces should be iterated over + * @iterator: the iterator function to call + * @data: first argument of the iterator function + */ +void ieee80211_iterate_stations_mtx(struct ieee80211_hw *hw, + void (*iterator)(void *data, + struct ieee80211_sta *sta), + void *data); + /** * ieee80211_queue_work - add work onto the mac80211 workqueue * diff --git a/net/mac80211/util.c b/net/mac80211/util.c index d682c32821a11..cc3c46a820773 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -827,7 +827,8 @@ static void __iterate_stations(struct ieee80211_local *local, { struct sta_info *sta; - list_for_each_entry_rcu(sta, &local->sta_list, list) { + list_for_each_entry_rcu(sta, &local->sta_list, list, + lockdep_is_held(&local->hw.wiphy->mtx)) { if (!sta->uploaded) continue; @@ -848,6 +849,19 @@ void ieee80211_iterate_stations_atomic(struct ieee80211_hw *hw, } EXPORT_SYMBOL_GPL(ieee80211_iterate_stations_atomic); +void ieee80211_iterate_stations_mtx(struct ieee80211_hw *hw, + void (*iterator)(void *data, + struct ieee80211_sta *sta), + void *data) +{ + struct ieee80211_local *local = hw_to_local(hw); + + lockdep_assert_wiphy(local->hw.wiphy); + + __iterate_stations(local, iterator, data); +} +EXPORT_SYMBOL_GPL(ieee80211_iterate_stations_mtx); + struct ieee80211_vif *wdev_to_ieee80211_vif(struct wireless_dev *wdev) { struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); From fb8235092da084a6556568cf903f547f1036aa4f Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Mon, 7 Oct 2024 19:59:27 +0300 Subject: [PATCH 030/216] wifi: ath12k: fix atomic calls in ath12k_mac_op_set_bitrate_mask() [ Upstream commit 8fac3266c68a8e647240b8ac8d0b82f1821edf85 ] When I try to manually set bitrates: iw wlan0 set bitrates legacy-2.4 1 I get sleeping from invalid context error, see below. Fix that by switching to use recently introduced ieee80211_iterate_stations_mtx(). Do note that WCN6855 firmware is still crashing, I'm not sure if that firmware even supports bitrate WMI commands and should we consider disabling ath12k_mac_op_set_bitrate_mask() for WCN6855? But that's for another patch. BUG: sleeping function called from invalid context at drivers/net/wireless/ath/ath12k/wmi.c:420 in_atomic(): 0, irqs_disabled(): 0, non_block: 0, pid: 2236, name: iw preempt_count: 0, expected: 0 RCU nest depth: 1, expected: 0 3 locks held by iw/2236: #0: ffffffffabc6f1d8 (cb_lock){++++}-{3:3}, at: genl_rcv+0x14/0x40 #1: ffff888138410810 (&rdev->wiphy.mtx){+.+.}-{3:3}, at: nl80211_pre_doit+0x54d/0x800 [cfg80211] #2: ffffffffab2cfaa0 (rcu_read_lock){....}-{1:2}, at: ieee80211_iterate_stations_atomic+0x2f/0x200 [mac80211] CPU: 3 UID: 0 PID: 2236 Comm: iw Not tainted 6.11.0-rc7-wt-ath+ #1772 Hardware name: Intel(R) Client Systems NUC8i7HVK/NUC8i7HVB, BIOS HNKBLi70.86A.0067.2021.0528.1339 05/28/2021 Call Trace: dump_stack_lvl+0xa4/0xe0 dump_stack+0x10/0x20 __might_resched+0x363/0x5a0 ? __alloc_skb+0x165/0x340 __might_sleep+0xad/0x160 ath12k_wmi_cmd_send+0xb1/0x3d0 [ath12k] ? ath12k_wmi_init_wcn7850+0xa40/0xa40 [ath12k] ? __netdev_alloc_skb+0x45/0x7b0 ? __asan_memset+0x39/0x40 ? ath12k_wmi_alloc_skb+0xf0/0x150 [ath12k] ? reacquire_held_locks+0x4d0/0x4d0 ath12k_wmi_set_peer_param+0x340/0x5b0 [ath12k] ath12k_mac_disable_peer_fixed_rate+0xa3/0x110 [ath12k] ? ath12k_mac_vdev_stop+0x4f0/0x4f0 [ath12k] ieee80211_iterate_stations_atomic+0xd4/0x200 [mac80211] ath12k_mac_op_set_bitrate_mask+0x5d2/0x1080 [ath12k] ? ath12k_mac_vif_chan+0x320/0x320 [ath12k] drv_set_bitrate_mask+0x267/0x470 [mac80211] ieee80211_set_bitrate_mask+0x4cc/0x8a0 [mac80211] ? __this_cpu_preempt_check+0x13/0x20 nl80211_set_tx_bitrate_mask+0x2bc/0x530 [cfg80211] ? nl80211_parse_tx_bitrate_mask+0x2320/0x2320 [cfg80211] ? trace_contention_end+0xef/0x140 ? rtnl_unlock+0x9/0x10 ? nl80211_pre_doit+0x557/0x800 [cfg80211] genl_family_rcv_msg_doit+0x1f0/0x2e0 ? genl_family_rcv_msg_attrs_parse.isra.0+0x250/0x250 ? ns_capable+0x57/0xd0 genl_family_rcv_msg+0x34c/0x600 ? genl_family_rcv_msg_dumpit+0x310/0x310 ? __lock_acquire+0xc62/0x1de0 ? he_set_mcs_mask.isra.0+0x8d0/0x8d0 [cfg80211] ? nl80211_parse_tx_bitrate_mask+0x2320/0x2320 [cfg80211] ? cfg80211_external_auth_request+0x690/0x690 [cfg80211] genl_rcv_msg+0xa0/0x130 netlink_rcv_skb+0x14c/0x400 ? genl_family_rcv_msg+0x600/0x600 ? netlink_ack+0xd70/0xd70 ? rwsem_optimistic_spin+0x4f0/0x4f0 ? genl_rcv+0x14/0x40 ? down_read_killable+0x580/0x580 ? netlink_deliver_tap+0x13e/0x350 ? __this_cpu_preempt_check+0x13/0x20 genl_rcv+0x23/0x40 netlink_unicast+0x45e/0x790 ? netlink_attachskb+0x7f0/0x7f0 netlink_sendmsg+0x7eb/0xdb0 ? netlink_unicast+0x790/0x790 ? __this_cpu_preempt_check+0x13/0x20 ? selinux_socket_sendmsg+0x31/0x40 ? netlink_unicast+0x790/0x790 __sock_sendmsg+0xc9/0x160 ____sys_sendmsg+0x620/0x990 ? kernel_sendmsg+0x30/0x30 ? __copy_msghdr+0x410/0x410 ? __kasan_check_read+0x11/0x20 ? mark_lock+0xe6/0x1470 ___sys_sendmsg+0xe9/0x170 ? copy_msghdr_from_user+0x120/0x120 ? __lock_acquire+0xc62/0x1de0 ? do_fault_around+0x2c6/0x4e0 ? do_user_addr_fault+0x8c1/0xde0 ? reacquire_held_locks+0x220/0x4d0 ? do_user_addr_fault+0x8c1/0xde0 ? __kasan_check_read+0x11/0x20 ? __fdget+0x4e/0x1d0 ? sockfd_lookup_light+0x1a/0x170 __sys_sendmsg+0xd2/0x180 ? __sys_sendmsg_sock+0x20/0x20 ? reacquire_held_locks+0x4d0/0x4d0 ? debug_smp_processor_id+0x17/0x20 __x64_sys_sendmsg+0x72/0xb0 ? lockdep_hardirqs_on+0x7d/0x100 x64_sys_call+0x894/0x9f0 do_syscall_64+0x64/0x130 entry_SYSCALL_64_after_hwframe+0x4b/0x53 RIP: 0033:0x7f230fe04807 Code: 64 89 02 48 c7 c0 ff ff ff ff eb bb 0f 1f 80 00 00 00 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 89 54 24 1c 48 89 74 24 10 RSP: 002b:00007ffe996a7ea8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 0000556f9f9c3390 RCX: 00007f230fe04807 RDX: 0000000000000000 RSI: 00007ffe996a7ee0 RDI: 0000000000000003 RBP: 0000556f9f9c88c0 R08: 0000000000000002 R09: 0000000000000000 R10: 0000556f965ca190 R11: 0000000000000246 R12: 0000556f9f9c8780 R13: 00007ffe996a7ee0 R14: 0000556f9f9c87d0 R15: 0000556f9f9c88c0 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20241007165932.78081-2-kvalo@kernel.org Signed-off-by: Jeff Johnson Signed-off-by: Sasha Levin (cherry picked from commit 3ed6b2daa4e9029987885f86835ffbc003d11c01) --- drivers/net/wireless/ath/ath12k/mac.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index f324549aeb44f..f04a52e47d8e5 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -6676,9 +6676,9 @@ ath12k_mac_op_set_bitrate_mask(struct ieee80211_hw *hw, arvif->vdev_id, ret); return ret; } - ieee80211_iterate_stations_atomic(hw, - ath12k_mac_disable_peer_fixed_rate, - arvif); + ieee80211_iterate_stations_mtx(hw, + ath12k_mac_disable_peer_fixed_rate, + arvif); } else if (ath12k_mac_bitrate_mask_get_single_nss(ar, band, mask, &single_nss)) { rate = WMI_FIXED_RATE_NONE; @@ -6722,16 +6722,16 @@ ath12k_mac_op_set_bitrate_mask(struct ieee80211_hw *hw, return -EINVAL; } - ieee80211_iterate_stations_atomic(hw, - ath12k_mac_disable_peer_fixed_rate, - arvif); + ieee80211_iterate_stations_mtx(hw, + ath12k_mac_disable_peer_fixed_rate, + arvif); mutex_lock(&ar->conf_mutex); arvif->bitrate_mask = *mask; - ieee80211_iterate_stations_atomic(hw, - ath12k_mac_set_bitrate_mask_iter, - arvif); + ieee80211_iterate_stations_mtx(hw, + ath12k_mac_set_bitrate_mask_iter, + arvif); mutex_unlock(&ar->conf_mutex); } From af597827fc4e33edb96a25ec783827c32bfa0a7b Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Wed, 29 Nov 2023 13:39:28 +0200 Subject: [PATCH 031/216] wifi: ath10k: Update Qualcomm Innovation Center, Inc. copyrights [ Upstream commit b1dc0ba41431147e55407140962c76f3e7a06753 ] Update the copyright for all ath10k files modified on behalf of Qualcomm Innovation Center, Inc. in 2021 through 2023. Signed-off-by: Jeff Johnson Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20231128-ath12kcopyrights-v1-3-be0b7408cbac@quicinc.com Stable-dep-of: 95c38953cb1e ("wifi: ath10k: avoid NULL pointer error during sdio remove") Signed-off-by: Sasha Levin (cherry picked from commit 358c36eae58d72d8bc4a8ce30040591931c489a6) --- drivers/net/wireless/ath/ath10k/bmi.c | 1 + drivers/net/wireless/ath/ath10k/ce.c | 1 + drivers/net/wireless/ath/ath10k/core.c | 1 + drivers/net/wireless/ath/ath10k/core.h | 1 + drivers/net/wireless/ath/ath10k/coredump.c | 1 + drivers/net/wireless/ath/ath10k/coredump.h | 1 + drivers/net/wireless/ath/ath10k/debug.c | 1 + drivers/net/wireless/ath/ath10k/debugfs_sta.c | 1 + drivers/net/wireless/ath/ath10k/htc.c | 1 + drivers/net/wireless/ath/ath10k/htt.h | 1 + drivers/net/wireless/ath/ath10k/htt_rx.c | 1 + drivers/net/wireless/ath/ath10k/htt_tx.c | 1 + drivers/net/wireless/ath/ath10k/hw.c | 1 + drivers/net/wireless/ath/ath10k/hw.h | 1 + drivers/net/wireless/ath/ath10k/mac.c | 1 + drivers/net/wireless/ath/ath10k/pci.c | 1 + drivers/net/wireless/ath/ath10k/pci.h | 1 + drivers/net/wireless/ath/ath10k/qmi.c | 1 + drivers/net/wireless/ath/ath10k/qmi_wlfw_v01.c | 1 + drivers/net/wireless/ath/ath10k/qmi_wlfw_v01.h | 1 + drivers/net/wireless/ath/ath10k/rx_desc.h | 1 + drivers/net/wireless/ath/ath10k/sdio.c | 1 + drivers/net/wireless/ath/ath10k/thermal.c | 1 + drivers/net/wireless/ath/ath10k/usb.h | 1 + drivers/net/wireless/ath/ath10k/wmi-tlv.h | 1 + drivers/net/wireless/ath/ath10k/wmi.c | 1 + drivers/net/wireless/ath/ath10k/wmi.h | 1 + drivers/net/wireless/ath/ath10k/wow.c | 1 + 28 files changed, 28 insertions(+) diff --git a/drivers/net/wireless/ath/ath10k/bmi.c b/drivers/net/wireless/ath/ath10k/bmi.c index af6546572df26..9a4f8e815412c 100644 --- a/drivers/net/wireless/ath/ath10k/bmi.c +++ b/drivers/net/wireless/ath/ath10k/bmi.c @@ -2,6 +2,7 @@ /* * Copyright (c) 2005-2011 Atheros Communications Inc. * Copyright (c) 2011-2014,2016-2017 Qualcomm Atheros, Inc. + * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. */ #include "bmi.h" diff --git a/drivers/net/wireless/ath/ath10k/ce.c b/drivers/net/wireless/ath/ath10k/ce.c index c27b8204718a6..afae4a8027f83 100644 --- a/drivers/net/wireless/ath/ath10k/ce.c +++ b/drivers/net/wireless/ath/ath10k/ce.c @@ -3,6 +3,7 @@ * Copyright (c) 2005-2011 Atheros Communications Inc. * Copyright (c) 2011-2017 Qualcomm Atheros, Inc. * Copyright (c) 2018 The Linux Foundation. All rights reserved. + * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. */ #include "hif.h" diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c index 81058be3598f1..c3a8b3496be2a 100644 --- a/drivers/net/wireless/ath/ath10k/core.c +++ b/drivers/net/wireless/ath/ath10k/core.c @@ -3,6 +3,7 @@ * Copyright (c) 2005-2011 Atheros Communications Inc. * Copyright (c) 2011-2017 Qualcomm Atheros, Inc. * Copyright (c) 2018-2019, The Linux Foundation. All rights reserved. + * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved. */ #include diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h index 4b5239de40184..cb2359d2ee0b0 100644 --- a/drivers/net/wireless/ath/ath10k/core.h +++ b/drivers/net/wireless/ath/ath10k/core.h @@ -3,6 +3,7 @@ * Copyright (c) 2005-2011 Atheros Communications Inc. * Copyright (c) 2011-2017 Qualcomm Atheros, Inc. * Copyright (c) 2018-2019, The Linux Foundation. All rights reserved. + * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef _CORE_H_ diff --git a/drivers/net/wireless/ath/ath10k/coredump.c b/drivers/net/wireless/ath/ath10k/coredump.c index 2d1634a890dde..bb3a276b7ed58 100644 --- a/drivers/net/wireless/ath/ath10k/coredump.c +++ b/drivers/net/wireless/ath/ath10k/coredump.c @@ -2,6 +2,7 @@ /* * Copyright (c) 2011-2017 Qualcomm Atheros, Inc. * Copyright (c) 2018, The Linux Foundation. All rights reserved. + * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. */ #include "coredump.h" diff --git a/drivers/net/wireless/ath/ath10k/coredump.h b/drivers/net/wireless/ath/ath10k/coredump.h index 437b9759f05d3..e5ef0352e319c 100644 --- a/drivers/net/wireless/ath/ath10k/coredump.h +++ b/drivers/net/wireless/ath/ath10k/coredump.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: ISC */ /* * Copyright (c) 2011-2017 Qualcomm Atheros, Inc. + * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef _COREDUMP_H_ diff --git a/drivers/net/wireless/ath/ath10k/debug.c b/drivers/net/wireless/ath/ath10k/debug.c index fe89bc61e5317..92ad0a04bcc73 100644 --- a/drivers/net/wireless/ath/ath10k/debug.c +++ b/drivers/net/wireless/ath/ath10k/debug.c @@ -3,6 +3,7 @@ * Copyright (c) 2005-2011 Atheros Communications Inc. * Copyright (c) 2011-2017 Qualcomm Atheros, Inc. * Copyright (c) 2018, The Linux Foundation. All rights reserved. + * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. */ #include diff --git a/drivers/net/wireless/ath/ath10k/debugfs_sta.c b/drivers/net/wireless/ath/ath10k/debugfs_sta.c index 5598cf706daab..0f6de862c3a9b 100644 --- a/drivers/net/wireless/ath/ath10k/debugfs_sta.c +++ b/drivers/net/wireless/ath/ath10k/debugfs_sta.c @@ -2,6 +2,7 @@ /* * Copyright (c) 2014-2017 Qualcomm Atheros, Inc. * Copyright (c) 2018, The Linux Foundation. All rights reserved. + * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. */ #include "core.h" diff --git a/drivers/net/wireless/ath/ath10k/htc.c b/drivers/net/wireless/ath/ath10k/htc.c index 5bfeecb95fca2..a6e21ce90bad6 100644 --- a/drivers/net/wireless/ath/ath10k/htc.c +++ b/drivers/net/wireless/ath/ath10k/htc.c @@ -2,6 +2,7 @@ /* * Copyright (c) 2005-2011 Atheros Communications Inc. * Copyright (c) 2011-2017 Qualcomm Atheros, Inc. + * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. */ #include "core.h" diff --git a/drivers/net/wireless/ath/ath10k/htt.h b/drivers/net/wireless/ath/ath10k/htt.h index 7b24297146e72..52f6dc6b81c5e 100644 --- a/drivers/net/wireless/ath/ath10k/htt.h +++ b/drivers/net/wireless/ath/ath10k/htt.h @@ -3,6 +3,7 @@ * Copyright (c) 2005-2011 Atheros Communications Inc. * Copyright (c) 2011-2017 Qualcomm Atheros, Inc. * Copyright (c) 2018, The Linux Foundation. All rights reserved. + * Copyright (c) 2021, 2023 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef _HTT_H_ diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c index 438b0caaceb79..51855f23ea266 100644 --- a/drivers/net/wireless/ath/ath10k/htt_rx.c +++ b/drivers/net/wireless/ath/ath10k/htt_rx.c @@ -3,6 +3,7 @@ * Copyright (c) 2005-2011 Atheros Communications Inc. * Copyright (c) 2011-2017 Qualcomm Atheros, Inc. * Copyright (c) 2018, The Linux Foundation. All rights reserved. + * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. */ #include "core.h" diff --git a/drivers/net/wireless/ath/ath10k/htt_tx.c b/drivers/net/wireless/ath/ath10k/htt_tx.c index bd603feb79531..60425d22d7079 100644 --- a/drivers/net/wireless/ath/ath10k/htt_tx.c +++ b/drivers/net/wireless/ath/ath10k/htt_tx.c @@ -2,6 +2,7 @@ /* * Copyright (c) 2005-2011 Atheros Communications Inc. * Copyright (c) 2011-2017 Qualcomm Atheros, Inc. + * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. */ #include diff --git a/drivers/net/wireless/ath/ath10k/hw.c b/drivers/net/wireless/ath/ath10k/hw.c index 6d32b43a4da65..8fafe096adff5 100644 --- a/drivers/net/wireless/ath/ath10k/hw.c +++ b/drivers/net/wireless/ath/ath10k/hw.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: ISC /* * Copyright (c) 2014-2017 Qualcomm Atheros, Inc. + * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. */ #include diff --git a/drivers/net/wireless/ath/ath10k/hw.h b/drivers/net/wireless/ath/ath10k/hw.h index 7ecdd0011cfa4..afd336282615c 100644 --- a/drivers/net/wireless/ath/ath10k/hw.h +++ b/drivers/net/wireless/ath/ath10k/hw.h @@ -3,6 +3,7 @@ * Copyright (c) 2005-2011 Atheros Communications Inc. * Copyright (c) 2011-2017 Qualcomm Atheros, Inc. * Copyright (c) 2018 The Linux Foundation. All rights reserved. + * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef _HW_H_ diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index d5e6e11f630b9..655fb5cdf01f8 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -3,6 +3,7 @@ * Copyright (c) 2005-2011 Atheros Communications Inc. * Copyright (c) 2011-2017 Qualcomm Atheros, Inc. * Copyright (c) 2018-2019, The Linux Foundation. All rights reserved. + * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved. */ #include "mac.h" diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c index 23f3662219390..aaa240f3c08a9 100644 --- a/drivers/net/wireless/ath/ath10k/pci.c +++ b/drivers/net/wireless/ath/ath10k/pci.c @@ -2,6 +2,7 @@ /* * Copyright (c) 2005-2011 Atheros Communications Inc. * Copyright (c) 2011-2017 Qualcomm Atheros, Inc. + * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. */ #include diff --git a/drivers/net/wireless/ath/ath10k/pci.h b/drivers/net/wireless/ath/ath10k/pci.h index 480cd97ab739d..27bb4cf2dfea9 100644 --- a/drivers/net/wireless/ath/ath10k/pci.h +++ b/drivers/net/wireless/ath/ath10k/pci.h @@ -2,6 +2,7 @@ /* * Copyright (c) 2005-2011 Atheros Communications Inc. * Copyright (c) 2011-2017 Qualcomm Atheros, Inc. + * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef _PCI_H_ diff --git a/drivers/net/wireless/ath/ath10k/qmi.c b/drivers/net/wireless/ath/ath10k/qmi.c index 52c1a3de8da60..38e939f572a9e 100644 --- a/drivers/net/wireless/ath/ath10k/qmi.c +++ b/drivers/net/wireless/ath/ath10k/qmi.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: ISC /* * Copyright (c) 2018 The Linux Foundation. All rights reserved. + * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. */ #include diff --git a/drivers/net/wireless/ath/ath10k/qmi_wlfw_v01.c b/drivers/net/wireless/ath/ath10k/qmi_wlfw_v01.c index 1c81e454f943f..0e85c75d22783 100644 --- a/drivers/net/wireless/ath/ath10k/qmi_wlfw_v01.c +++ b/drivers/net/wireless/ath/ath10k/qmi_wlfw_v01.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: ISC /* * Copyright (c) 2018 The Linux Foundation. All rights reserved. + * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. */ #include diff --git a/drivers/net/wireless/ath/ath10k/qmi_wlfw_v01.h b/drivers/net/wireless/ath/ath10k/qmi_wlfw_v01.h index f0db991408dc2..9f311f3bc9e7f 100644 --- a/drivers/net/wireless/ath/ath10k/qmi_wlfw_v01.h +++ b/drivers/net/wireless/ath/ath10k/qmi_wlfw_v01.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: ISC */ /* * Copyright (c) 2018 The Linux Foundation. All rights reserved. + * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef WCN3990_QMI_SVC_V01_H diff --git a/drivers/net/wireless/ath/ath10k/rx_desc.h b/drivers/net/wireless/ath/ath10k/rx_desc.h index 777e53aa69dc8..564293df1e9ac 100644 --- a/drivers/net/wireless/ath/ath10k/rx_desc.h +++ b/drivers/net/wireless/ath/ath10k/rx_desc.h @@ -2,6 +2,7 @@ /* * Copyright (c) 2005-2011 Atheros Communications Inc. * Copyright (c) 2011-2017 Qualcomm Atheros, Inc. + * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef _RX_DESC_H_ diff --git a/drivers/net/wireless/ath/ath10k/sdio.c b/drivers/net/wireless/ath/ath10k/sdio.c index 56fbcfb80bf88..0ab5433f6cf6f 100644 --- a/drivers/net/wireless/ath/ath10k/sdio.c +++ b/drivers/net/wireless/ath/ath10k/sdio.c @@ -3,6 +3,7 @@ * Copyright (c) 2004-2011 Atheros Communications Inc. * Copyright (c) 2011-2012,2017 Qualcomm Atheros, Inc. * Copyright (c) 2016-2017 Erik Stromdahl + * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. */ #include diff --git a/drivers/net/wireless/ath/ath10k/thermal.c b/drivers/net/wireless/ath/ath10k/thermal.c index cefd97323dfe5..31c8d7fbb0955 100644 --- a/drivers/net/wireless/ath/ath10k/thermal.c +++ b/drivers/net/wireless/ath/ath10k/thermal.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: ISC /* * Copyright (c) 2014-2015 Qualcomm Atheros, Inc. + * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. */ #include diff --git a/drivers/net/wireless/ath/ath10k/usb.h b/drivers/net/wireless/ath/ath10k/usb.h index 48e066ba81624..7e4cfbb673c9a 100644 --- a/drivers/net/wireless/ath/ath10k/usb.h +++ b/drivers/net/wireless/ath/ath10k/usb.h @@ -3,6 +3,7 @@ * Copyright (c) 2004-2011 Atheros Communications Inc. * Copyright (c) 2011-2012 Qualcomm Atheros, Inc. * Copyright (c) 2016-2017 Erik Stromdahl + * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef _USB_H_ diff --git a/drivers/net/wireless/ath/ath10k/wmi-tlv.h b/drivers/net/wireless/ath/ath10k/wmi-tlv.h index dbb48d70f2e93..83a8f07a687f7 100644 --- a/drivers/net/wireless/ath/ath10k/wmi-tlv.h +++ b/drivers/net/wireless/ath/ath10k/wmi-tlv.h @@ -3,6 +3,7 @@ * Copyright (c) 2005-2011 Atheros Communications Inc. * Copyright (c) 2011-2017 Qualcomm Atheros, Inc. * Copyright (c) 2018-2019, The Linux Foundation. All rights reserved. + * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef _WMI_TLV_H #define _WMI_TLV_H diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c index 1c21dbde77b84..818aea99f85eb 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.c +++ b/drivers/net/wireless/ath/ath10k/wmi.c @@ -3,6 +3,7 @@ * Copyright (c) 2005-2011 Atheros Communications Inc. * Copyright (c) 2011-2017 Qualcomm Atheros, Inc. * Copyright (c) 2018-2019, The Linux Foundation. All rights reserved. + * Copyright (c) 2021-2022 Qualcomm Innovation Center, Inc. All rights reserved. */ #include diff --git a/drivers/net/wireless/ath/ath10k/wmi.h b/drivers/net/wireless/ath/ath10k/wmi.h index b112e88260931..9146df98fceee 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.h +++ b/drivers/net/wireless/ath/ath10k/wmi.h @@ -3,6 +3,7 @@ * Copyright (c) 2005-2011 Atheros Communications Inc. * Copyright (c) 2011-2017 Qualcomm Atheros, Inc. * Copyright (c) 2018-2019, The Linux Foundation. All rights reserved. + * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef _WMI_H_ diff --git a/drivers/net/wireless/ath/ath10k/wow.c b/drivers/net/wireless/ath/ath10k/wow.c index 20b9aa8ddf7d5..aa7b2e703f3d4 100644 --- a/drivers/net/wireless/ath/ath10k/wow.c +++ b/drivers/net/wireless/ath/ath10k/wow.c @@ -2,6 +2,7 @@ /* * Copyright (c) 2015-2017 Qualcomm Atheros, Inc. * Copyright (c) 2018, The Linux Foundation. All rights reserved. + * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. */ #include "mac.h" From 46a9fdf1c245ab15984301b88f1c342762d35960 Mon Sep 17 00:00:00 2001 From: Kang Yang Date: Tue, 8 Oct 2024 10:22:46 +0800 Subject: [PATCH 032/216] wifi: ath10k: avoid NULL pointer error during sdio remove MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 95c38953cb1ecf40399a676a1f85dfe2b5780a9a ] When running 'rmmod ath10k', ath10k_sdio_remove() will free sdio workqueue by destroy_workqueue(). But if CONFIG_INIT_ON_FREE_DEFAULT_ON is set to yes, kernel panic will happen: Call trace: destroy_workqueue+0x1c/0x258 ath10k_sdio_remove+0x84/0x94 sdio_bus_remove+0x50/0x16c device_release_driver_internal+0x188/0x25c device_driver_detach+0x20/0x2c This is because during 'rmmod ath10k', ath10k_sdio_remove() will call ath10k_core_destroy() before destroy_workqueue(). wiphy_dev_release() will finally be called in ath10k_core_destroy(). This function will free struct cfg80211_registered_device *rdev and all its members, including wiphy, dev and the pointer of sdio workqueue. Then the pointer of sdio workqueue will be set to NULL due to CONFIG_INIT_ON_FREE_DEFAULT_ON. After device release, destroy_workqueue() will use NULL pointer then the kernel panic happen. Call trace: ath10k_sdio_remove ->ath10k_core_unregister …… ->ath10k_core_stop ->ath10k_hif_stop ->ath10k_sdio_irq_disable ->ath10k_hif_power_down ->del_timer_sync(&ar_sdio->sleep_timer) ->ath10k_core_destroy ->ath10k_mac_destroy ->ieee80211_free_hw ->wiphy_free …… ->wiphy_dev_release ->destroy_workqueue Need to call destroy_workqueue() before ath10k_core_destroy(), free the work queue buffer first and then free pointer of work queue by ath10k_core_destroy(). This order matches the error path order in ath10k_sdio_probe(). No work will be queued on sdio workqueue between it is destroyed and ath10k_core_destroy() is called. Based on the call_stack above, the reason is: Only ath10k_sdio_sleep_timer_handler(), ath10k_sdio_hif_tx_sg() and ath10k_sdio_irq_disable() will queue work on sdio workqueue. Sleep timer will be deleted before ath10k_core_destroy() in ath10k_hif_power_down(). ath10k_sdio_irq_disable() only be called in ath10k_hif_stop(). ath10k_core_unregister() will call ath10k_hif_power_down() to stop hif bus, so ath10k_sdio_hif_tx_sg() won't be called anymore. Tested-on: QCA6174 hw3.2 SDIO WLAN.RMH.4.4.1-00189 Signed-off-by: Kang Yang Tested-by: David Ruth Reviewed-by: David Ruth Link: https://patch.msgid.link/20241008022246.1010-1-quic_kangyang@quicinc.com Signed-off-by: Jeff Johnson Signed-off-by: Sasha Levin (cherry picked from commit b35de9e01fc79c7baac666fb2dcb4ba7698a1d97) --- drivers/net/wireless/ath/ath10k/sdio.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/sdio.c b/drivers/net/wireless/ath/ath10k/sdio.c index 0ab5433f6cf6f..850d999615a2c 100644 --- a/drivers/net/wireless/ath/ath10k/sdio.c +++ b/drivers/net/wireless/ath/ath10k/sdio.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2011 Atheros Communications Inc. * Copyright (c) 2011-2012,2017 Qualcomm Atheros, Inc. * Copyright (c) 2016-2017 Erik Stromdahl - * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -2648,9 +2648,9 @@ static void ath10k_sdio_remove(struct sdio_func *func) netif_napi_del(&ar->napi); - ath10k_core_destroy(ar); - destroy_workqueue(ar_sdio->workqueue); + + ath10k_core_destroy(ar); } static const struct sdio_device_id ath10k_sdio_devices[] = { From 4d223023aa04229332a625b25cbfa3084d4d97ba Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Mon, 10 Jun 2024 13:18:01 +0300 Subject: [PATCH 033/216] i2c: i801: Add support for Intel Arrow Lake-H [ Upstream commit f0eda4ddb2146a9f29d31b54c396f741bd0c82f1 ] Add SMBus PCI ID on Intel Arrow Lake-H. Signed-off-by: Jarkko Nikula Signed-off-by: Andi Shyti Stable-dep-of: bd492b583712 ("i2c: i801: Add support for Intel Panther Lake") Signed-off-by: Sasha Levin (cherry picked from commit f38ca98b07211d437ea27413e70391c08ae206b0) --- Documentation/i2c/busses/i2c-i801.rst | 1 + drivers/i2c/busses/Kconfig | 1 + drivers/i2c/busses/i2c-i801.c | 3 +++ 3 files changed, 5 insertions(+) diff --git a/Documentation/i2c/busses/i2c-i801.rst b/Documentation/i2c/busses/i2c-i801.rst index 10eced6c2e462..c840b597912c8 100644 --- a/Documentation/i2c/busses/i2c-i801.rst +++ b/Documentation/i2c/busses/i2c-i801.rst @@ -48,6 +48,7 @@ Supported adapters: * Intel Raptor Lake (PCH) * Intel Meteor Lake (SOC and PCH) * Intel Birch Stream (SOC) + * Intel Arrow Lake (SOC) Datasheets: Publicly available at the Intel website diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index 0063f1a0fba8d..e810ca993ef20 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -159,6 +159,7 @@ config I2C_I801 Raptor Lake (PCH) Meteor Lake (SOC and PCH) Birch Stream (SOC) + Arrow Lake (SOC) This driver can also be built as a module. If so, the module will be called i2c-i801. diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index 2b8bcd121ffa5..ed943f303cdba 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -80,6 +80,7 @@ * Meteor Lake SoC-S (SOC) 0xae22 32 hard yes yes yes * Meteor Lake PCH-S (PCH) 0x7f23 32 hard yes yes yes * Birch Stream (SOC) 0x5796 32 hard yes yes yes + * Arrow Lake-H (SOC) 0x7722 32 hard yes yes yes * * Features supported by this driver: * Software PEC no @@ -234,6 +235,7 @@ #define PCI_DEVICE_ID_INTEL_ALDER_LAKE_M_SMBUS 0x54a3 #define PCI_DEVICE_ID_INTEL_BIRCH_STREAM_SMBUS 0x5796 #define PCI_DEVICE_ID_INTEL_BROXTON_SMBUS 0x5ad4 +#define PCI_DEVICE_ID_INTEL_ARROW_LAKE_H_SMBUS 0x7722 #define PCI_DEVICE_ID_INTEL_RAPTOR_LAKE_S_SMBUS 0x7a23 #define PCI_DEVICE_ID_INTEL_ALDER_LAKE_S_SMBUS 0x7aa3 #define PCI_DEVICE_ID_INTEL_METEOR_LAKE_P_SMBUS 0x7e22 @@ -1046,6 +1048,7 @@ static const struct pci_device_id i801_ids[] = { { PCI_DEVICE_DATA(INTEL, METEOR_LAKE_SOC_S_SMBUS, FEATURES_ICH5 | FEATURE_TCO_CNL) }, { PCI_DEVICE_DATA(INTEL, METEOR_LAKE_PCH_S_SMBUS, FEATURES_ICH5 | FEATURE_TCO_CNL) }, { PCI_DEVICE_DATA(INTEL, BIRCH_STREAM_SMBUS, FEATURES_ICH5 | FEATURE_TCO_CNL) }, + { PCI_DEVICE_DATA(INTEL, ARROW_LAKE_H_SMBUS, FEATURES_ICH5 | FEATURE_TCO_CNL) }, { 0, } }; From f66b9a00046d06ef233eebd67fa66f3dd112d99d Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Mon, 23 Sep 2024 16:27:19 +0300 Subject: [PATCH 034/216] i2c: i801: Add support for Intel Panther Lake [ Upstream commit bd492b58371295d3ae26162b9666be584abad68a ] Add SMBus PCI IDs on Intel Panther Lake-P and -U. Signed-off-by: Jarkko Nikula Signed-off-by: Andi Shyti Signed-off-by: Sasha Levin (cherry picked from commit 6e6a3479986aa4d71590da3c281df55c3e5fde7b) --- Documentation/i2c/busses/i2c-i801.rst | 1 + drivers/i2c/busses/Kconfig | 1 + drivers/i2c/busses/i2c-i801.c | 6 ++++++ 3 files changed, 8 insertions(+) diff --git a/Documentation/i2c/busses/i2c-i801.rst b/Documentation/i2c/busses/i2c-i801.rst index c840b597912c8..47e8ac5b7099f 100644 --- a/Documentation/i2c/busses/i2c-i801.rst +++ b/Documentation/i2c/busses/i2c-i801.rst @@ -49,6 +49,7 @@ Supported adapters: * Intel Meteor Lake (SOC and PCH) * Intel Birch Stream (SOC) * Intel Arrow Lake (SOC) + * Intel Panther Lake (SOC) Datasheets: Publicly available at the Intel website diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index e810ca993ef20..d94f36400ad9c 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -160,6 +160,7 @@ config I2C_I801 Meteor Lake (SOC and PCH) Birch Stream (SOC) Arrow Lake (SOC) + Panther Lake (SOC) This driver can also be built as a module. If so, the module will be called i2c-i801. diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index ed943f303cdba..18c04f5e41d9c 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -81,6 +81,8 @@ * Meteor Lake PCH-S (PCH) 0x7f23 32 hard yes yes yes * Birch Stream (SOC) 0x5796 32 hard yes yes yes * Arrow Lake-H (SOC) 0x7722 32 hard yes yes yes + * Panther Lake-H (SOC) 0xe322 32 hard yes yes yes + * Panther Lake-P (SOC) 0xe422 32 hard yes yes yes * * Features supported by this driver: * Software PEC no @@ -258,6 +260,8 @@ #define PCI_DEVICE_ID_INTEL_CANNONLAKE_H_SMBUS 0xa323 #define PCI_DEVICE_ID_INTEL_COMETLAKE_V_SMBUS 0xa3a3 #define PCI_DEVICE_ID_INTEL_METEOR_LAKE_SOC_S_SMBUS 0xae22 +#define PCI_DEVICE_ID_INTEL_PANTHER_LAKE_H_SMBUS 0xe322 +#define PCI_DEVICE_ID_INTEL_PANTHER_LAKE_P_SMBUS 0xe422 struct i801_mux_config { char *gpio_chip; @@ -1049,6 +1053,8 @@ static const struct pci_device_id i801_ids[] = { { PCI_DEVICE_DATA(INTEL, METEOR_LAKE_PCH_S_SMBUS, FEATURES_ICH5 | FEATURE_TCO_CNL) }, { PCI_DEVICE_DATA(INTEL, BIRCH_STREAM_SMBUS, FEATURES_ICH5 | FEATURE_TCO_CNL) }, { PCI_DEVICE_DATA(INTEL, ARROW_LAKE_H_SMBUS, FEATURES_ICH5 | FEATURE_TCO_CNL) }, + { PCI_DEVICE_DATA(INTEL, PANTHER_LAKE_H_SMBUS, FEATURES_ICH5 | FEATURE_TCO_CNL) }, + { PCI_DEVICE_DATA(INTEL, PANTHER_LAKE_P_SMBUS, FEATURES_ICH5 | FEATURE_TCO_CNL) }, { 0, } }; From 825b78fb8cf925acd8dcf5c387e681876f8266b7 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Tue, 1 Oct 2024 09:21:25 +0200 Subject: [PATCH 035/216] Bluetooth: hci_conn: Reduce hci_conn_drop() calls in two functions [ Upstream commit d96b543c6f3b78b6440b68b5a5bbface553eff28 ] An hci_conn_drop() call was immediately used after a null pointer check for an hci_conn_link() call in two function implementations. Thus call such a function only once instead directly before the checks. This issue was transformed by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin (cherry picked from commit ca4e69826d67cafbf13a2675789c3f84f04404a4) --- net/bluetooth/hci_conn.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 6178ae8feafc0..549ee9e87d636 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -2178,13 +2178,9 @@ struct hci_conn *hci_bind_bis(struct hci_dev *hdev, bdaddr_t *dst, conn->iso_qos.bcast.big); if (parent && parent != conn) { link = hci_conn_link(parent, conn); - if (!link) { - hci_conn_drop(conn); - return ERR_PTR(-ENOLINK); - } - - /* Link takes the refcount */ hci_conn_drop(conn); + if (!link) + return ERR_PTR(-ENOLINK); } return conn; @@ -2274,15 +2270,12 @@ struct hci_conn *hci_connect_cis(struct hci_dev *hdev, bdaddr_t *dst, } link = hci_conn_link(le, cis); + hci_conn_drop(cis); if (!link) { hci_conn_drop(le); - hci_conn_drop(cis); return ERR_PTR(-ENOLINK); } - /* Link takes the refcount */ - hci_conn_drop(cis); - cis->state = BT_CONNECT; hci_le_create_cis_pending(hdev); From fc88d93ac86333a68cbd259654ad75774666e67d Mon Sep 17 00:00:00 2001 From: Yihang Li Date: Wed, 13 Sep 2023 10:15:26 +0800 Subject: [PATCH 036/216] scsi: hisi_sas: Directly call register snapshot instead of using workqueue [ Upstream commit 2ff07b5c6fe9173e7a7de3b23f300d71ad4d8fde ] Currently, register information dump is performed via workqueue, regardless of the trigger mode (automatic or manual). There is a delay in dumping register through workqueue, the exact register information at trigger time cannot be obtained. Call register snapshot directly instead of through a workqueue. Signed-off-by: Yihang Li Signed-off-by: Xiang Chen Link: https://lore.kernel.org/r/1694571327-78697-3-git-send-email-chenxiang66@hisilicon.com Signed-off-by: Martin K. Petersen Stable-dep-of: 9f564f15f884 ("scsi: hisi_sas: Create all dump files during debugfs initialization") Signed-off-by: Sasha Levin (cherry picked from commit 91e035e98fa1383ca90d774c29bb14c3c5ed2d8c) --- drivers/scsi/hisi_sas/hisi_sas.h | 1 - drivers/scsi/hisi_sas/hisi_sas_main.c | 7 +++++-- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 14 +++----------- 3 files changed, 8 insertions(+), 14 deletions(-) diff --git a/drivers/scsi/hisi_sas/hisi_sas.h b/drivers/scsi/hisi_sas/hisi_sas.h index 9e73e9cbbcfc6..3d511c44c02d4 100644 --- a/drivers/scsi/hisi_sas/hisi_sas.h +++ b/drivers/scsi/hisi_sas/hisi_sas.h @@ -451,7 +451,6 @@ struct hisi_hba { const struct hisi_sas_hw *hw; /* Low level hw interface */ unsigned long sata_dev_bitmap[BITS_TO_LONGS(HISI_SAS_MAX_DEVICES)]; struct work_struct rst_work; - struct work_struct debugfs_work; u32 phy_state; u32 intr_coal_ticks; /* Time of interrupt coalesce in us */ u32 intr_coal_count; /* Interrupt count to coalesce */ diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index db9ae206974c2..5fdba7b39a1b2 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -1967,8 +1967,11 @@ static bool hisi_sas_internal_abort_timeout(struct sas_task *task, struct hisi_hba *hisi_hba = dev_to_hisi_hba(device); struct hisi_sas_internal_abort_data *timeout = data; - if (hisi_sas_debugfs_enable && hisi_hba->debugfs_itct[0].itct) - queue_work(hisi_hba->wq, &hisi_hba->debugfs_work); + if (hisi_sas_debugfs_enable && hisi_hba->debugfs_itct[0].itct) { + down(&hisi_hba->sem); + hisi_hba->hw->debugfs_snapshot_regs(hisi_hba); + up(&hisi_hba->sem); + } if (task->task_state_flags & SAS_TASK_STATE_DONE) { pr_err("Internal abort: timeout %016llx\n", diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index 4054659d48f74..10f048b5a489f 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -558,7 +558,6 @@ static int experimental_iopoll_q_cnt; module_param(experimental_iopoll_q_cnt, int, 0444); MODULE_PARM_DESC(experimental_iopoll_q_cnt, "number of queues to be used as poll mode, def=0"); -static void debugfs_work_handler_v3_hw(struct work_struct *work); static void debugfs_snapshot_regs_v3_hw(struct hisi_hba *hisi_hba); static u32 hisi_sas_read32(struct hisi_hba *hisi_hba, u32 off) @@ -3397,7 +3396,6 @@ hisi_sas_shost_alloc_pci(struct pci_dev *pdev) hisi_hba = shost_priv(shost); INIT_WORK(&hisi_hba->rst_work, hisi_sas_rst_work_handler); - INIT_WORK(&hisi_hba->debugfs_work, debugfs_work_handler_v3_hw); hisi_hba->hw = &hisi_sas_v3_hw; hisi_hba->pci_dev = pdev; hisi_hba->dev = dev; @@ -3919,7 +3917,9 @@ static ssize_t debugfs_trigger_dump_v3_hw_write(struct file *file, if (buf[0] != '1') return -EFAULT; - queue_work(hisi_hba->wq, &hisi_hba->debugfs_work); + down(&hisi_hba->sem); + debugfs_snapshot_regs_v3_hw(hisi_hba); + up(&hisi_hba->sem); return count; } @@ -4670,14 +4670,6 @@ static void debugfs_fifo_init_v3_hw(struct hisi_hba *hisi_hba) } } -static void debugfs_work_handler_v3_hw(struct work_struct *work) -{ - struct hisi_hba *hisi_hba = - container_of(work, struct hisi_hba, debugfs_work); - - debugfs_snapshot_regs_v3_hw(hisi_hba); -} - static void debugfs_release_v3_hw(struct hisi_hba *hisi_hba, int dump_index) { struct device *dev = hisi_hba->dev; From 017c55609b714e28388e543e72f959404427f919 Mon Sep 17 00:00:00 2001 From: Yihang Li Date: Wed, 13 Sep 2023 10:15:27 +0800 Subject: [PATCH 037/216] scsi: hisi_sas: Allocate DFX memory during dump trigger [ Upstream commit 63f0733d07ce60252e885602b39571ade0441015 ] Currently, if CONFIG_SCSI_HISI_SAS_DEBUGFS_DEFAULT_ENABLE is enabled, the memory space used by DFX is allocated during device initialization, which occupies a large number of memory resources. The memory usage before and after the driver is loaded is as follows: Memory usage before the driver is loaded: $ free -m total used free shared buff/cache available Mem: 867352 2578 864037 11 735 861681 Swap: 4095 0 4095 Memory usage after the driver which include 4 HBAs is loaded: $ insmod hisi_sas_v3_hw.ko $ free -m total used free shared buff/cache available Mem: 867352 4760 861848 11 743 859495 Swap: 4095 0 4095 The driver with 4 HBAs connected will allocate about 110 MB of memory without enabling debugfs. Therefore, to avoid wasting memory resources, DFX memory is allocated during dump triggering. The dump may fail due to memory allocation failure. After this change, each dump costs about 10 MB of memory, and each dump lasts about 100 ms. Signed-off-by: Yihang Li Signed-off-by: Xiang Chen Link: https://lore.kernel.org/r/1694571327-78697-4-git-send-email-chenxiang66@hisilicon.com Signed-off-by: Martin K. Petersen Stable-dep-of: 9f564f15f884 ("scsi: hisi_sas: Create all dump files during debugfs initialization") Signed-off-by: Sasha Levin (cherry picked from commit 044928679823342272f91feaa5878b720324b803) --- drivers/scsi/hisi_sas/hisi_sas.h | 2 +- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 93 +++++++++++++------------- 2 files changed, 46 insertions(+), 49 deletions(-) diff --git a/drivers/scsi/hisi_sas/hisi_sas.h b/drivers/scsi/hisi_sas/hisi_sas.h index 3d511c44c02d4..1e4550156b735 100644 --- a/drivers/scsi/hisi_sas/hisi_sas.h +++ b/drivers/scsi/hisi_sas/hisi_sas.h @@ -343,7 +343,7 @@ struct hisi_sas_hw { u8 reg_index, u8 reg_count, u8 *write_data); void (*wait_cmds_complete_timeout)(struct hisi_hba *hisi_hba, int delay_ms, int timeout_ms); - void (*debugfs_snapshot_regs)(struct hisi_hba *hisi_hba); + int (*debugfs_snapshot_regs)(struct hisi_hba *hisi_hba); int complete_hdr_size; const struct scsi_host_template *sht; }; diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index 10f048b5a489f..cea5486556294 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -558,7 +558,7 @@ static int experimental_iopoll_q_cnt; module_param(experimental_iopoll_q_cnt, int, 0444); MODULE_PARM_DESC(experimental_iopoll_q_cnt, "number of queues to be used as poll mode, def=0"); -static void debugfs_snapshot_regs_v3_hw(struct hisi_hba *hisi_hba); +static int debugfs_snapshot_regs_v3_hw(struct hisi_hba *hisi_hba); static u32 hisi_sas_read32(struct hisi_hba *hisi_hba, u32 off) { @@ -3867,37 +3867,6 @@ static void debugfs_create_files_v3_hw(struct hisi_hba *hisi_hba) &debugfs_ras_v3_hw_fops); } -static void debugfs_snapshot_regs_v3_hw(struct hisi_hba *hisi_hba) -{ - int debugfs_dump_index = hisi_hba->debugfs_dump_index; - struct device *dev = hisi_hba->dev; - u64 timestamp = local_clock(); - - if (debugfs_dump_index >= hisi_sas_debugfs_dump_count) { - dev_warn(dev, "dump count exceeded!\n"); - return; - } - - do_div(timestamp, NSEC_PER_MSEC); - hisi_hba->debugfs_timestamp[debugfs_dump_index] = timestamp; - - debugfs_snapshot_prepare_v3_hw(hisi_hba); - - debugfs_snapshot_global_reg_v3_hw(hisi_hba); - debugfs_snapshot_port_reg_v3_hw(hisi_hba); - debugfs_snapshot_axi_reg_v3_hw(hisi_hba); - debugfs_snapshot_ras_reg_v3_hw(hisi_hba); - debugfs_snapshot_cq_reg_v3_hw(hisi_hba); - debugfs_snapshot_dq_reg_v3_hw(hisi_hba); - debugfs_snapshot_itct_reg_v3_hw(hisi_hba); - debugfs_snapshot_iost_reg_v3_hw(hisi_hba); - - debugfs_create_files_v3_hw(hisi_hba); - - debugfs_snapshot_restore_v3_hw(hisi_hba); - hisi_hba->debugfs_dump_index++; -} - static ssize_t debugfs_trigger_dump_v3_hw_write(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) @@ -3905,9 +3874,6 @@ static ssize_t debugfs_trigger_dump_v3_hw_write(struct file *file, struct hisi_hba *hisi_hba = file->f_inode->i_private; char buf[8]; - if (hisi_hba->debugfs_dump_index >= hisi_sas_debugfs_dump_count) - return -EFAULT; - if (count > 8) return -EFAULT; @@ -3918,7 +3884,10 @@ static ssize_t debugfs_trigger_dump_v3_hw_write(struct file *file, return -EFAULT; down(&hisi_hba->sem); - debugfs_snapshot_regs_v3_hw(hisi_hba); + if (debugfs_snapshot_regs_v3_hw(hisi_hba)) { + up(&hisi_hba->sem); + return -EFAULT; + } up(&hisi_hba->sem); return count; @@ -4704,7 +4673,7 @@ static int debugfs_alloc_v3_hw(struct hisi_hba *hisi_hba, int dump_index) { const struct hisi_sas_hw *hw = hisi_hba->hw; struct device *dev = hisi_hba->dev; - int p, c, d, r, i; + int p, c, d, r; size_t sz; for (r = 0; r < DEBUGFS_REGS_NUM; r++) { @@ -4784,11 +4753,48 @@ static int debugfs_alloc_v3_hw(struct hisi_hba *hisi_hba, int dump_index) return 0; fail: - for (i = 0; i < hisi_sas_debugfs_dump_count; i++) - debugfs_release_v3_hw(hisi_hba, i); + debugfs_release_v3_hw(hisi_hba, dump_index); return -ENOMEM; } +static int debugfs_snapshot_regs_v3_hw(struct hisi_hba *hisi_hba) +{ + int debugfs_dump_index = hisi_hba->debugfs_dump_index; + struct device *dev = hisi_hba->dev; + u64 timestamp = local_clock(); + + if (debugfs_dump_index >= hisi_sas_debugfs_dump_count) { + dev_warn(dev, "dump count exceeded!\n"); + return -EINVAL; + } + + if (debugfs_alloc_v3_hw(hisi_hba, debugfs_dump_index)) { + dev_warn(dev, "failed to alloc memory\n"); + return -ENOMEM; + } + + do_div(timestamp, NSEC_PER_MSEC); + hisi_hba->debugfs_timestamp[debugfs_dump_index] = timestamp; + + debugfs_snapshot_prepare_v3_hw(hisi_hba); + + debugfs_snapshot_global_reg_v3_hw(hisi_hba); + debugfs_snapshot_port_reg_v3_hw(hisi_hba); + debugfs_snapshot_axi_reg_v3_hw(hisi_hba); + debugfs_snapshot_ras_reg_v3_hw(hisi_hba); + debugfs_snapshot_cq_reg_v3_hw(hisi_hba); + debugfs_snapshot_dq_reg_v3_hw(hisi_hba); + debugfs_snapshot_itct_reg_v3_hw(hisi_hba); + debugfs_snapshot_iost_reg_v3_hw(hisi_hba); + + debugfs_create_files_v3_hw(hisi_hba); + + debugfs_snapshot_restore_v3_hw(hisi_hba); + hisi_hba->debugfs_dump_index++; + + return 0; +} + static void debugfs_phy_down_cnt_init_v3_hw(struct hisi_hba *hisi_hba) { struct dentry *dir = debugfs_create_dir("phy_down_cnt", @@ -4875,7 +4881,6 @@ static void debugfs_exit_v3_hw(struct hisi_hba *hisi_hba) static void debugfs_init_v3_hw(struct hisi_hba *hisi_hba) { struct device *dev = hisi_hba->dev; - int i; hisi_hba->debugfs_dir = debugfs_create_dir(dev_name(dev), hisi_sas_debugfs_dir); @@ -4892,14 +4897,6 @@ static void debugfs_init_v3_hw(struct hisi_hba *hisi_hba) debugfs_phy_down_cnt_init_v3_hw(hisi_hba); debugfs_fifo_init_v3_hw(hisi_hba); - - for (i = 0; i < hisi_sas_debugfs_dump_count; i++) { - if (debugfs_alloc_v3_hw(hisi_hba, i)) { - debugfs_exit_v3_hw(hisi_hba); - dev_dbg(dev, "failed to init debugfs!\n"); - break; - } - } } static int From 117929d7307bc2626a30989489c9c3bdcc644f1c Mon Sep 17 00:00:00 2001 From: Yihang Li Date: Tue, 8 Oct 2024 10:18:21 +0800 Subject: [PATCH 038/216] scsi: hisi_sas: Create all dump files during debugfs initialization [ Upstream commit 9f564f15f88490b484e02442dc4c4b11640ea172 ] For the current debugfs of hisi_sas, after user triggers dump, the driver allocate memory space to save the register information and create debugfs files to display the saved information. In this process, the debugfs files created after each dump. Therefore, when the dump is triggered while the driver is unbind, the following hang occurs: [67840.853907] Unable to handle kernel NULL pointer dereference at virtual address 00000000000000a0 [67840.862947] Mem abort info: [67840.865855] ESR = 0x0000000096000004 [67840.869713] EC = 0x25: DABT (current EL), IL = 32 bits [67840.875125] SET = 0, FnV = 0 [67840.878291] EA = 0, S1PTW = 0 [67840.881545] FSC = 0x04: level 0 translation fault [67840.886528] Data abort info: [67840.889524] ISV = 0, ISS = 0x00000004, ISS2 = 0x00000000 [67840.895117] CM = 0, WnR = 0, TnD = 0, TagAccess = 0 [67840.900284] GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 [67840.905709] user pgtable: 4k pages, 48-bit VAs, pgdp=0000002803a1f000 [67840.912263] [00000000000000a0] pgd=0000000000000000, p4d=0000000000000000 [67840.919177] Internal error: Oops: 0000000096000004 [#1] PREEMPT SMP [67840.996435] pstate: 80400009 (Nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) [67841.003628] pc : down_write+0x30/0x98 [67841.007546] lr : start_creating.part.0+0x60/0x198 [67841.012495] sp : ffff8000b979ba20 [67841.016046] x29: ffff8000b979ba20 x28: 0000000000000010 x27: 0000000000024b40 [67841.023412] x26: 0000000000000012 x25: ffff20202b355ae8 x24: ffff20202b35a8c8 [67841.030779] x23: ffffa36877928208 x22: ffffa368b4972240 x21: ffff8000b979bb18 [67841.038147] x20: ffff00281dc1e3c0 x19: fffffffffffffffe x18: 0000000000000020 [67841.045515] x17: 0000000000000000 x16: ffffa368b128a530 x15: ffffffffffffffff [67841.052888] x14: ffff8000b979bc18 x13: ffffffffffffffff x12: ffff8000b979bb18 [67841.060263] x11: 0000000000000000 x10: 0000000000000000 x9 : ffffa368b1289b18 [67841.067640] x8 : 0000000000000012 x7 : 0000000000000000 x6 : 00000000000003a9 [67841.075014] x5 : 0000000000000000 x4 : ffff002818c5cb00 x3 : 0000000000000001 [67841.082388] x2 : 0000000000000000 x1 : ffff002818c5cb00 x0 : 00000000000000a0 [67841.089759] Call trace: [67841.092456] down_write+0x30/0x98 [67841.096017] start_creating.part.0+0x60/0x198 [67841.100613] debugfs_create_dir+0x48/0x1f8 [67841.104950] debugfs_create_files_v3_hw+0x88/0x348 [hisi_sas_v3_hw] [67841.111447] debugfs_snapshot_regs_v3_hw+0x708/0x798 [hisi_sas_v3_hw] [67841.118111] debugfs_trigger_dump_v3_hw_write+0x9c/0x120 [hisi_sas_v3_hw] [67841.125115] full_proxy_write+0x68/0xc8 [67841.129175] vfs_write+0xd8/0x3f0 [67841.132708] ksys_write+0x70/0x108 [67841.136317] __arm64_sys_write+0x24/0x38 [67841.140440] invoke_syscall+0x50/0x128 [67841.144385] el0_svc_common.constprop.0+0xc8/0xf0 [67841.149273] do_el0_svc+0x24/0x38 [67841.152773] el0_svc+0x38/0xd8 [67841.156009] el0t_64_sync_handler+0xc0/0xc8 [67841.160361] el0t_64_sync+0x1a4/0x1a8 [67841.164189] Code: b9000882 d2800002 d2800023 f9800011 (c85ffc05) [67841.170443] ---[ end trace 0000000000000000 ]--- To fix this issue, create all directories and files during debugfs initialization. In this way, the driver only needs to allocate memory space to save information each time the user triggers dumping. Signed-off-by: Yihang Li Link: https://lore.kernel.org/r/20241008021822.2617339-13-liyihang9@huawei.com Reviewed-by: Xingui Yang Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin (cherry picked from commit 7c8c50c9855a9e1b0d1e3680e5ad839002a9deb5) --- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 99 ++++++++++++++++++++------ 1 file changed, 77 insertions(+), 22 deletions(-) diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index cea5486556294..ff5f86867dbf0 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -3560,6 +3560,11 @@ debugfs_to_reg_name_v3_hw(int off, int base_off, return NULL; } +static bool debugfs_dump_is_generated_v3_hw(void *p) +{ + return p ? true : false; +} + static void debugfs_print_reg_v3_hw(u32 *regs_val, struct seq_file *s, const struct hisi_sas_debugfs_reg *reg) { @@ -3585,6 +3590,9 @@ static int debugfs_global_v3_hw_show(struct seq_file *s, void *p) { struct hisi_sas_debugfs_regs *global = s->private; + if (!debugfs_dump_is_generated_v3_hw(global->data)) + return -EPERM; + debugfs_print_reg_v3_hw(global->data, s, &debugfs_global_reg); @@ -3596,6 +3604,9 @@ static int debugfs_axi_v3_hw_show(struct seq_file *s, void *p) { struct hisi_sas_debugfs_regs *axi = s->private; + if (!debugfs_dump_is_generated_v3_hw(axi->data)) + return -EPERM; + debugfs_print_reg_v3_hw(axi->data, s, &debugfs_axi_reg); @@ -3607,6 +3618,9 @@ static int debugfs_ras_v3_hw_show(struct seq_file *s, void *p) { struct hisi_sas_debugfs_regs *ras = s->private; + if (!debugfs_dump_is_generated_v3_hw(ras->data)) + return -EPERM; + debugfs_print_reg_v3_hw(ras->data, s, &debugfs_ras_reg); @@ -3619,6 +3633,9 @@ static int debugfs_port_v3_hw_show(struct seq_file *s, void *p) struct hisi_sas_debugfs_port *port = s->private; const struct hisi_sas_debugfs_reg *reg_port = &debugfs_port_reg; + if (!debugfs_dump_is_generated_v3_hw(port->data)) + return -EPERM; + debugfs_print_reg_v3_hw(port->data, s, reg_port); return 0; @@ -3674,6 +3691,9 @@ static int debugfs_cq_v3_hw_show(struct seq_file *s, void *p) struct hisi_sas_debugfs_cq *debugfs_cq = s->private; int slot; + if (!debugfs_dump_is_generated_v3_hw(debugfs_cq->complete_hdr)) + return -EPERM; + for (slot = 0; slot < HISI_SAS_QUEUE_SLOTS; slot++) debugfs_cq_show_slot_v3_hw(s, slot, debugfs_cq); @@ -3695,8 +3715,12 @@ static void debugfs_dq_show_slot_v3_hw(struct seq_file *s, int slot, static int debugfs_dq_v3_hw_show(struct seq_file *s, void *p) { + struct hisi_sas_debugfs_dq *debugfs_dq = s->private; int slot; + if (!debugfs_dump_is_generated_v3_hw(debugfs_dq->hdr)) + return -EPERM; + for (slot = 0; slot < HISI_SAS_QUEUE_SLOTS; slot++) debugfs_dq_show_slot_v3_hw(s, slot, s->private); @@ -3710,6 +3734,9 @@ static int debugfs_iost_v3_hw_show(struct seq_file *s, void *p) struct hisi_sas_iost *iost = debugfs_iost->iost; int i, max_command_entries = HISI_SAS_MAX_COMMANDS; + if (!debugfs_dump_is_generated_v3_hw(iost)) + return -EPERM; + for (i = 0; i < max_command_entries; i++, iost++) { __le64 *data = &iost->qw0; @@ -3729,6 +3756,9 @@ static int debugfs_iost_cache_v3_hw_show(struct seq_file *s, void *p) int i, tab_idx; __le64 *iost; + if (!debugfs_dump_is_generated_v3_hw(iost_cache)) + return -EPERM; + for (i = 0; i < HISI_SAS_IOST_ITCT_CACHE_NUM; i++, iost_cache++) { /* * Data struct of IOST cache: @@ -3752,6 +3782,9 @@ static int debugfs_itct_v3_hw_show(struct seq_file *s, void *p) struct hisi_sas_debugfs_itct *debugfs_itct = s->private; struct hisi_sas_itct *itct = debugfs_itct->itct; + if (!debugfs_dump_is_generated_v3_hw(itct)) + return -EPERM; + for (i = 0; i < HISI_SAS_MAX_ITCT_ENTRIES; i++, itct++) { __le64 *data = &itct->qw0; @@ -3771,6 +3804,9 @@ static int debugfs_itct_cache_v3_hw_show(struct seq_file *s, void *p) int i, tab_idx; __le64 *itct; + if (!debugfs_dump_is_generated_v3_hw(itct_cache)) + return -EPERM; + for (i = 0; i < HISI_SAS_IOST_ITCT_CACHE_NUM; i++, itct_cache++) { /* * Data struct of ITCT cache: @@ -3788,10 +3824,9 @@ static int debugfs_itct_cache_v3_hw_show(struct seq_file *s, void *p) } DEFINE_SHOW_ATTRIBUTE(debugfs_itct_cache_v3_hw); -static void debugfs_create_files_v3_hw(struct hisi_hba *hisi_hba) +static void debugfs_create_files_v3_hw(struct hisi_hba *hisi_hba, int index) { u64 *debugfs_timestamp; - int dump_index = hisi_hba->debugfs_dump_index; struct dentry *dump_dentry; struct dentry *dentry; char name[256]; @@ -3799,17 +3834,17 @@ static void debugfs_create_files_v3_hw(struct hisi_hba *hisi_hba) int c; int d; - snprintf(name, 256, "%d", dump_index); + snprintf(name, 256, "%d", index); dump_dentry = debugfs_create_dir(name, hisi_hba->debugfs_dump_dentry); - debugfs_timestamp = &hisi_hba->debugfs_timestamp[dump_index]; + debugfs_timestamp = &hisi_hba->debugfs_timestamp[index]; debugfs_create_u64("timestamp", 0400, dump_dentry, debugfs_timestamp); debugfs_create_file("global", 0400, dump_dentry, - &hisi_hba->debugfs_regs[dump_index][DEBUGFS_GLOBAL], + &hisi_hba->debugfs_regs[index][DEBUGFS_GLOBAL], &debugfs_global_v3_hw_fops); /* Create port dir and files */ @@ -3818,7 +3853,7 @@ static void debugfs_create_files_v3_hw(struct hisi_hba *hisi_hba) snprintf(name, 256, "%d", p); debugfs_create_file(name, 0400, dentry, - &hisi_hba->debugfs_port_reg[dump_index][p], + &hisi_hba->debugfs_port_reg[index][p], &debugfs_port_v3_hw_fops); } @@ -3828,7 +3863,7 @@ static void debugfs_create_files_v3_hw(struct hisi_hba *hisi_hba) snprintf(name, 256, "%d", c); debugfs_create_file(name, 0400, dentry, - &hisi_hba->debugfs_cq[dump_index][c], + &hisi_hba->debugfs_cq[index][c], &debugfs_cq_v3_hw_fops); } @@ -3838,32 +3873,32 @@ static void debugfs_create_files_v3_hw(struct hisi_hba *hisi_hba) snprintf(name, 256, "%d", d); debugfs_create_file(name, 0400, dentry, - &hisi_hba->debugfs_dq[dump_index][d], + &hisi_hba->debugfs_dq[index][d], &debugfs_dq_v3_hw_fops); } debugfs_create_file("iost", 0400, dump_dentry, - &hisi_hba->debugfs_iost[dump_index], + &hisi_hba->debugfs_iost[index], &debugfs_iost_v3_hw_fops); debugfs_create_file("iost_cache", 0400, dump_dentry, - &hisi_hba->debugfs_iost_cache[dump_index], + &hisi_hba->debugfs_iost_cache[index], &debugfs_iost_cache_v3_hw_fops); debugfs_create_file("itct", 0400, dump_dentry, - &hisi_hba->debugfs_itct[dump_index], + &hisi_hba->debugfs_itct[index], &debugfs_itct_v3_hw_fops); debugfs_create_file("itct_cache", 0400, dump_dentry, - &hisi_hba->debugfs_itct_cache[dump_index], + &hisi_hba->debugfs_itct_cache[index], &debugfs_itct_cache_v3_hw_fops); debugfs_create_file("axi", 0400, dump_dentry, - &hisi_hba->debugfs_regs[dump_index][DEBUGFS_AXI], + &hisi_hba->debugfs_regs[index][DEBUGFS_AXI], &debugfs_axi_v3_hw_fops); debugfs_create_file("ras", 0400, dump_dentry, - &hisi_hba->debugfs_regs[dump_index][DEBUGFS_RAS], + &hisi_hba->debugfs_regs[index][DEBUGFS_RAS], &debugfs_ras_v3_hw_fops); } @@ -4645,22 +4680,34 @@ static void debugfs_release_v3_hw(struct hisi_hba *hisi_hba, int dump_index) int i; devm_kfree(dev, hisi_hba->debugfs_iost_cache[dump_index].cache); + hisi_hba->debugfs_iost_cache[dump_index].cache = NULL; devm_kfree(dev, hisi_hba->debugfs_itct_cache[dump_index].cache); + hisi_hba->debugfs_itct_cache[dump_index].cache = NULL; devm_kfree(dev, hisi_hba->debugfs_iost[dump_index].iost); + hisi_hba->debugfs_iost[dump_index].iost = NULL; devm_kfree(dev, hisi_hba->debugfs_itct[dump_index].itct); + hisi_hba->debugfs_itct[dump_index].itct = NULL; - for (i = 0; i < hisi_hba->queue_count; i++) + for (i = 0; i < hisi_hba->queue_count; i++) { devm_kfree(dev, hisi_hba->debugfs_dq[dump_index][i].hdr); + hisi_hba->debugfs_dq[dump_index][i].hdr = NULL; + } - for (i = 0; i < hisi_hba->queue_count; i++) + for (i = 0; i < hisi_hba->queue_count; i++) { devm_kfree(dev, hisi_hba->debugfs_cq[dump_index][i].complete_hdr); + hisi_hba->debugfs_cq[dump_index][i].complete_hdr = NULL; + } - for (i = 0; i < DEBUGFS_REGS_NUM; i++) + for (i = 0; i < DEBUGFS_REGS_NUM; i++) { devm_kfree(dev, hisi_hba->debugfs_regs[dump_index][i].data); + hisi_hba->debugfs_regs[dump_index][i].data = NULL; + } - for (i = 0; i < hisi_hba->n_phy; i++) + for (i = 0; i < hisi_hba->n_phy; i++) { devm_kfree(dev, hisi_hba->debugfs_port_reg[dump_index][i].data); + hisi_hba->debugfs_port_reg[dump_index][i].data = NULL; + } } static const struct hisi_sas_debugfs_reg *debugfs_reg_array_v3_hw[DEBUGFS_REGS_NUM] = { @@ -4787,8 +4834,6 @@ static int debugfs_snapshot_regs_v3_hw(struct hisi_hba *hisi_hba) debugfs_snapshot_itct_reg_v3_hw(hisi_hba); debugfs_snapshot_iost_reg_v3_hw(hisi_hba); - debugfs_create_files_v3_hw(hisi_hba); - debugfs_snapshot_restore_v3_hw(hisi_hba); hisi_hba->debugfs_dump_index++; @@ -4872,6 +4917,17 @@ static void debugfs_bist_init_v3_hw(struct hisi_hba *hisi_hba) hisi_hba->debugfs_bist_linkrate = SAS_LINK_RATE_1_5_GBPS; } +static void debugfs_dump_init_v3_hw(struct hisi_hba *hisi_hba) +{ + int i; + + hisi_hba->debugfs_dump_dentry = + debugfs_create_dir("dump", hisi_hba->debugfs_dir); + + for (i = 0; i < hisi_sas_debugfs_dump_count; i++) + debugfs_create_files_v3_hw(hisi_hba, i); +} + static void debugfs_exit_v3_hw(struct hisi_hba *hisi_hba) { debugfs_remove_recursive(hisi_hba->debugfs_dir); @@ -4892,8 +4948,7 @@ static void debugfs_init_v3_hw(struct hisi_hba *hisi_hba) /* create bist structures */ debugfs_bist_init_v3_hw(hisi_hba); - hisi_hba->debugfs_dump_dentry = - debugfs_create_dir("dump", hisi_hba->debugfs_dir); + debugfs_dump_init_v3_hw(hisi_hba); debugfs_phy_down_cnt_init_v3_hw(hisi_hba); debugfs_fifo_init_v3_hw(hisi_hba); From f5679ffbd46a20200fe356025117a16b4b9cd18c Mon Sep 17 00:00:00 2001 From: Rajendra Nayak Date: Fri, 2 Feb 2024 20:34:41 +0200 Subject: [PATCH 039/216] clk: qcom: clk-alpha-pll: Add support for zonda ole pll configure [ Upstream commit c32f4f4ae1c6035b44bb4ca7a41fa4fd51244597 ] Zonda ole pll has as extra PLL_OFF_CONFIG_CTL_U2 register, hence add support for it. Signed-off-by: Rajendra Nayak Signed-off-by: Abel Vesa Link: https://lore.kernel.org/r/20240202-x1e80100-clock-controllers-v4-6-7fb08c861c7c@linaro.org Signed-off-by: Bjorn Andersson Stable-dep-of: 79dfed29aa3f ("clk: qcom: clk-alpha-pll: Add NSS HUAYRA ALPHA PLL support for ipq9574") Signed-off-by: Sasha Levin (cherry picked from commit deff81f56dff602b193265a80c850d555810511c) --- drivers/clk/qcom/clk-alpha-pll.c | 16 ++++++++++++++++ drivers/clk/qcom/clk-alpha-pll.h | 4 ++++ 2 files changed, 20 insertions(+) diff --git a/drivers/clk/qcom/clk-alpha-pll.c b/drivers/clk/qcom/clk-alpha-pll.c index 8b3e5f84e89a7..87040b949eb41 100644 --- a/drivers/clk/qcom/clk-alpha-pll.c +++ b/drivers/clk/qcom/clk-alpha-pll.c @@ -52,6 +52,7 @@ #define PLL_CONFIG_CTL(p) ((p)->offset + (p)->regs[PLL_OFF_CONFIG_CTL]) #define PLL_CONFIG_CTL_U(p) ((p)->offset + (p)->regs[PLL_OFF_CONFIG_CTL_U]) #define PLL_CONFIG_CTL_U1(p) ((p)->offset + (p)->regs[PLL_OFF_CONFIG_CTL_U1]) +#define PLL_CONFIG_CTL_U2(p) ((p)->offset + (p)->regs[PLL_OFF_CONFIG_CTL_U2]) #define PLL_TEST_CTL(p) ((p)->offset + (p)->regs[PLL_OFF_TEST_CTL]) #define PLL_TEST_CTL_U(p) ((p)->offset + (p)->regs[PLL_OFF_TEST_CTL_U]) #define PLL_TEST_CTL_U1(p) ((p)->offset + (p)->regs[PLL_OFF_TEST_CTL_U1]) @@ -227,6 +228,21 @@ const u8 clk_alpha_pll_regs[][PLL_OFF_MAX_REGS] = { [PLL_OFF_ALPHA_VAL] = 0x24, [PLL_OFF_ALPHA_VAL_U] = 0x28, }, + [CLK_ALPHA_PLL_TYPE_ZONDA_OLE] = { + [PLL_OFF_L_VAL] = 0x04, + [PLL_OFF_ALPHA_VAL] = 0x08, + [PLL_OFF_USER_CTL] = 0x0c, + [PLL_OFF_USER_CTL_U] = 0x10, + [PLL_OFF_CONFIG_CTL] = 0x14, + [PLL_OFF_CONFIG_CTL_U] = 0x18, + [PLL_OFF_CONFIG_CTL_U1] = 0x1c, + [PLL_OFF_CONFIG_CTL_U2] = 0x20, + [PLL_OFF_TEST_CTL] = 0x24, + [PLL_OFF_TEST_CTL_U] = 0x28, + [PLL_OFF_TEST_CTL_U1] = 0x2c, + [PLL_OFF_OPMODE] = 0x30, + [PLL_OFF_STATUS] = 0x3c, + }, }; EXPORT_SYMBOL_GPL(clk_alpha_pll_regs); diff --git a/drivers/clk/qcom/clk-alpha-pll.h b/drivers/clk/qcom/clk-alpha-pll.h index 3fd0ef41c72c8..f50de33a045d1 100644 --- a/drivers/clk/qcom/clk-alpha-pll.h +++ b/drivers/clk/qcom/clk-alpha-pll.h @@ -21,6 +21,7 @@ enum { CLK_ALPHA_PLL_TYPE_LUCID = CLK_ALPHA_PLL_TYPE_TRION, CLK_ALPHA_PLL_TYPE_AGERA, CLK_ALPHA_PLL_TYPE_ZONDA, + CLK_ALPHA_PLL_TYPE_ZONDA_OLE, CLK_ALPHA_PLL_TYPE_LUCID_EVO, CLK_ALPHA_PLL_TYPE_LUCID_OLE, CLK_ALPHA_PLL_TYPE_RIVIAN_EVO, @@ -42,6 +43,7 @@ enum { PLL_OFF_CONFIG_CTL, PLL_OFF_CONFIG_CTL_U, PLL_OFF_CONFIG_CTL_U1, + PLL_OFF_CONFIG_CTL_U2, PLL_OFF_TEST_CTL, PLL_OFF_TEST_CTL_U, PLL_OFF_TEST_CTL_U1, @@ -119,6 +121,7 @@ struct alpha_pll_config { u32 config_ctl_val; u32 config_ctl_hi_val; u32 config_ctl_hi1_val; + u32 config_ctl_hi2_val; u32 user_ctl_val; u32 user_ctl_hi_val; u32 user_ctl_hi1_val; @@ -173,6 +176,7 @@ extern const struct clk_ops clk_alpha_pll_postdiv_lucid_5lpe_ops; extern const struct clk_ops clk_alpha_pll_zonda_ops; #define clk_alpha_pll_postdiv_zonda_ops clk_alpha_pll_postdiv_fabia_ops +#define clk_alpha_pll_zonda_ole_ops clk_alpha_pll_zonda_ops extern const struct clk_ops clk_alpha_pll_lucid_evo_ops; extern const struct clk_ops clk_alpha_pll_reset_lucid_evo_ops; From d5d06ae1c4520fd3f4566b8598e12f7422620f6f Mon Sep 17 00:00:00 2001 From: Devi Priya Date: Mon, 28 Oct 2024 11:35:01 +0530 Subject: [PATCH 040/216] clk: qcom: clk-alpha-pll: Add NSS HUAYRA ALPHA PLL support for ipq9574 [ Upstream commit 79dfed29aa3f714e0a94a39b2bfe9ac14ce19a6a ] Add support for NSS Huayra alpha pll found on ipq9574 SoCs. Programming sequence is the same as that of Huayra type Alpha PLL, so we can re-use the same. Reviewed-by: Dmitry Baryshkov Signed-off-by: Devi Priya Link: https://lore.kernel.org/r/20241028060506.246606-2-quic_srichara@quicinc.com Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin (cherry picked from commit 82461d89c849f652010c6f89c8c0be89cfb8cc6e) --- drivers/clk/qcom/clk-alpha-pll.c | 11 +++++++++++ drivers/clk/qcom/clk-alpha-pll.h | 1 + 2 files changed, 12 insertions(+) diff --git a/drivers/clk/qcom/clk-alpha-pll.c b/drivers/clk/qcom/clk-alpha-pll.c index 87040b949eb41..ce44dbfd47e27 100644 --- a/drivers/clk/qcom/clk-alpha-pll.c +++ b/drivers/clk/qcom/clk-alpha-pll.c @@ -243,6 +243,17 @@ const u8 clk_alpha_pll_regs[][PLL_OFF_MAX_REGS] = { [PLL_OFF_OPMODE] = 0x30, [PLL_OFF_STATUS] = 0x3c, }, + [CLK_ALPHA_PLL_TYPE_NSS_HUAYRA] = { + [PLL_OFF_L_VAL] = 0x04, + [PLL_OFF_ALPHA_VAL] = 0x08, + [PLL_OFF_TEST_CTL] = 0x0c, + [PLL_OFF_TEST_CTL_U] = 0x10, + [PLL_OFF_USER_CTL] = 0x14, + [PLL_OFF_CONFIG_CTL] = 0x18, + [PLL_OFF_CONFIG_CTL_U] = 0x1c, + [PLL_OFF_STATUS] = 0x20, + }, + }; EXPORT_SYMBOL_GPL(clk_alpha_pll_regs); diff --git a/drivers/clk/qcom/clk-alpha-pll.h b/drivers/clk/qcom/clk-alpha-pll.h index f50de33a045d1..52dc5b9b546a1 100644 --- a/drivers/clk/qcom/clk-alpha-pll.h +++ b/drivers/clk/qcom/clk-alpha-pll.h @@ -29,6 +29,7 @@ enum { CLK_ALPHA_PLL_TYPE_BRAMMO_EVO, CLK_ALPHA_PLL_TYPE_STROMER, CLK_ALPHA_PLL_TYPE_STROMER_PLUS, + CLK_ALPHA_PLL_TYPE_NSS_HUAYRA, CLK_ALPHA_PLL_TYPE_MAX, }; From a9e3fa6d4736ced59ec4ce5ebd5f3ab37db8b6ab Mon Sep 17 00:00:00 2001 From: Huisong Li Date: Tue, 1 Aug 2023 14:38:26 +0800 Subject: [PATCH 041/216] mailbox: pcc: Add support for platform notification handling [ Upstream commit 60c40b06fa68694dd08a1a0038ea8b9de3f3b1ca ] Currently, PCC driver doesn't support the processing of platform notification for type 4 PCC subspaces. According to ACPI specification, if platform sends a notification to OSPM, it must clear the command complete bit and trigger platform interrupt. OSPM needs to check whether the command complete bit is cleared, clear platform interrupt, process command, and then set the command complete and ring doorbell to the Platform. Let us stash the value of the pcc type and use the same while processing the interrupt of the channel. We also need to set the command complete bit and ring doorbell in the interrupt handler for the type 4 channel to complete the communication flow after processing the notification from the Platform. Signed-off-by: Huisong Li Reviewed-by: Hanjun Guo Link: https://lore.kernel.org/r/20230801063827.25336-2-lihuisong@huawei.com Signed-off-by: Sudeep Holla Stable-dep-of: 7f9e19f207be ("mailbox: pcc: Check before sending MCTP PCC response ACK") Signed-off-by: Sasha Levin (cherry picked from commit 605018764e213917e614efdebcb7814332aa988a) --- drivers/mailbox/pcc.c | 50 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 41 insertions(+), 9 deletions(-) diff --git a/drivers/mailbox/pcc.c b/drivers/mailbox/pcc.c index a44d4b3e5beb2..80310b48bfb6a 100644 --- a/drivers/mailbox/pcc.c +++ b/drivers/mailbox/pcc.c @@ -91,6 +91,7 @@ struct pcc_chan_reg { * @cmd_update: PCC register bundle for the command complete update register * @error: PCC register bundle for the error status register * @plat_irq: platform interrupt + * @type: PCC subspace type */ struct pcc_chan_info { struct pcc_mbox_chan chan; @@ -100,12 +101,15 @@ struct pcc_chan_info { struct pcc_chan_reg cmd_update; struct pcc_chan_reg error; int plat_irq; + u8 type; }; #define to_pcc_chan_info(c) container_of(c, struct pcc_chan_info, chan) static struct pcc_chan_info *chan_info; static int pcc_chan_count; +static int pcc_send_data(struct mbox_chan *chan, void *data); + /* * PCC can be used with perf critical drivers such as CPPC * So it makes sense to locally cache the virtual address and @@ -221,6 +225,34 @@ static int pcc_map_interrupt(u32 interrupt, u32 flags) return acpi_register_gsi(NULL, interrupt, trigger, polarity); } +static bool pcc_mbox_cmd_complete_check(struct pcc_chan_info *pchan) +{ + u64 val; + int ret; + + ret = pcc_chan_reg_read(&pchan->cmd_complete, &val); + if (ret) + return false; + + if (!pchan->cmd_complete.gas) + return true; + + /* + * Judge if the channel respond the interrupt based on the value of + * command complete. + */ + val &= pchan->cmd_complete.status_mask; + /* + * If this is PCC slave subspace channel, and the command complete + * bit 0 indicates that Platform is sending a notification and OSPM + * needs to respond this interrupt to process this command. + */ + if (pchan->type == ACPI_PCCT_TYPE_EXT_PCC_SLAVE_SUBSPACE) + return !val; + + return !!val; +} + /** * pcc_mbox_irq - PCC mailbox interrupt handler * @irq: interrupt number @@ -236,17 +268,9 @@ static irqreturn_t pcc_mbox_irq(int irq, void *p) int ret; pchan = chan->con_priv; - - ret = pcc_chan_reg_read(&pchan->cmd_complete, &val); - if (ret) + if (!pcc_mbox_cmd_complete_check(pchan)) return IRQ_NONE; - if (val) { /* Ensure GAS exists and value is non-zero */ - val &= pchan->cmd_complete.status_mask; - if (!val) - return IRQ_NONE; - } - ret = pcc_chan_reg_read(&pchan->error, &val); if (ret) return IRQ_NONE; @@ -262,6 +286,13 @@ static irqreturn_t pcc_mbox_irq(int irq, void *p) mbox_chan_received_data(chan, NULL); + /* + * The PCC slave subspace channel needs to set the command complete bit + * and ring doorbell after processing message. + */ + if (pchan->type == ACPI_PCCT_TYPE_EXT_PCC_SLAVE_SUBSPACE) + pcc_send_data(chan, NULL); + return IRQ_HANDLED; } @@ -698,6 +729,7 @@ static int pcc_mbox_probe(struct platform_device *pdev) pcc_parse_subspace_shmem(pchan, pcct_entry); + pchan->type = pcct_entry->type; pcct_entry = (struct acpi_subtable_header *) ((unsigned long) pcct_entry + pcct_entry->length); } From 93128e2ea3af4d738e7221ab3f6c6a2f81b87009 Mon Sep 17 00:00:00 2001 From: Huisong Li Date: Tue, 1 Aug 2023 14:38:27 +0800 Subject: [PATCH 042/216] mailbox: pcc: Support shared interrupt for multiple subspaces [ Upstream commit 3db174e478cb0bb34888c20a531608b70aec9c1f ] If the platform acknowledge interrupt is level triggered, then it can be shared by multiple subspaces provided each one has a unique platform interrupt ack preserve and ack set masks. If it can be shared, then we can request the irq with IRQF_SHARED and IRQF_ONESHOT flags. The first one indicating it can be shared and the latter one to keep the interrupt disabled until the hardirq handler finished. Further, since there is no way to detect if the interrupt is for a given channel as the interrupt ack preserve and ack set masks are for clearing the interrupt and not for reading the status(in case Irq Ack register may be write-only on some platforms), we need a way to identify if the given channel is in use and expecting the interrupt. PCC type0, type1 and type5 do not support shared level triggered interrupt. The methods of determining whether a given channel for remaining types should respond to an interrupt are as follows: - type2: Whether the interrupt belongs to a given channel is only determined by the status field in Generic Communications Channel Shared Memory Region, which is done in rx_callback of PCC client. - type3: This channel checks chan_in_use flag first and then checks the command complete bit(value '1' indicates that the command has been completed). - type4: Platform ensure that the default value of the command complete bit corresponding to the type4 channel is '1'. This command complete bit is '0' when receive a platform notification. The new field, 'chan_in_use' is used by the type only support the communication from OSPM to Platform (like type3) and should be completely ignored by other types so as to avoid too many type unnecessary checks in IRQ handler. Signed-off-by: Huisong Li Reviewed-by: Hanjun Guo Link: https://lore.kernel.org/r/20230801063827.25336-3-lihuisong@huawei.com Signed-off-by: Sudeep Holla Stable-dep-of: 7f9e19f207be ("mailbox: pcc: Check before sending MCTP PCC response ACK") Signed-off-by: Sasha Levin (cherry picked from commit dcc02c9ebfe8c8132e190db42a8f9be6c24353f6) --- drivers/mailbox/pcc.c | 43 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 40 insertions(+), 3 deletions(-) diff --git a/drivers/mailbox/pcc.c b/drivers/mailbox/pcc.c index 80310b48bfb6a..94885e411085a 100644 --- a/drivers/mailbox/pcc.c +++ b/drivers/mailbox/pcc.c @@ -92,6 +92,13 @@ struct pcc_chan_reg { * @error: PCC register bundle for the error status register * @plat_irq: platform interrupt * @type: PCC subspace type + * @plat_irq_flags: platform interrupt flags + * @chan_in_use: this flag is used just to check if the interrupt needs + * handling when it is shared. Since only one transfer can occur + * at a time and mailbox takes care of locking, this flag can be + * accessed without a lock. Note: the type only support the + * communication from OSPM to Platform, like type3, use it, and + * other types completely ignore it. */ struct pcc_chan_info { struct pcc_mbox_chan chan; @@ -102,6 +109,8 @@ struct pcc_chan_info { struct pcc_chan_reg error; int plat_irq; u8 type; + unsigned int plat_irq_flags; + bool chan_in_use; }; #define to_pcc_chan_info(c) container_of(c, struct pcc_chan_info, chan) @@ -225,6 +234,12 @@ static int pcc_map_interrupt(u32 interrupt, u32 flags) return acpi_register_gsi(NULL, interrupt, trigger, polarity); } +static bool pcc_chan_plat_irq_can_be_shared(struct pcc_chan_info *pchan) +{ + return (pchan->plat_irq_flags & ACPI_PCCT_INTERRUPT_MODE) == + ACPI_LEVEL_SENSITIVE; +} + static bool pcc_mbox_cmd_complete_check(struct pcc_chan_info *pchan) { u64 val; @@ -242,6 +257,7 @@ static bool pcc_mbox_cmd_complete_check(struct pcc_chan_info *pchan) * command complete. */ val &= pchan->cmd_complete.status_mask; + /* * If this is PCC slave subspace channel, and the command complete * bit 0 indicates that Platform is sending a notification and OSPM @@ -268,6 +284,10 @@ static irqreturn_t pcc_mbox_irq(int irq, void *p) int ret; pchan = chan->con_priv; + if (pchan->type == ACPI_PCCT_TYPE_EXT_PCC_MASTER_SUBSPACE && + !pchan->chan_in_use) + return IRQ_NONE; + if (!pcc_mbox_cmd_complete_check(pchan)) return IRQ_NONE; @@ -289,9 +309,12 @@ static irqreturn_t pcc_mbox_irq(int irq, void *p) /* * The PCC slave subspace channel needs to set the command complete bit * and ring doorbell after processing message. + * + * The PCC master subspace channel clears chan_in_use to free channel. */ if (pchan->type == ACPI_PCCT_TYPE_EXT_PCC_SLAVE_SUBSPACE) pcc_send_data(chan, NULL); + pchan->chan_in_use = false; return IRQ_HANDLED; } @@ -371,7 +394,11 @@ static int pcc_send_data(struct mbox_chan *chan, void *data) if (ret) return ret; - return pcc_chan_reg_read_modify_write(&pchan->db); + ret = pcc_chan_reg_read_modify_write(&pchan->db); + if (!ret && pchan->plat_irq > 0) + pchan->chan_in_use = true; + + return ret; } /** @@ -384,11 +411,14 @@ static int pcc_send_data(struct mbox_chan *chan, void *data) static int pcc_startup(struct mbox_chan *chan) { struct pcc_chan_info *pchan = chan->con_priv; + unsigned long irqflags; int rc; if (pchan->plat_irq > 0) { - rc = devm_request_irq(chan->mbox->dev, pchan->plat_irq, pcc_mbox_irq, 0, - MBOX_IRQ_NAME, chan); + irqflags = pcc_chan_plat_irq_can_be_shared(pchan) ? + IRQF_SHARED | IRQF_ONESHOT : 0; + rc = devm_request_irq(chan->mbox->dev, pchan->plat_irq, pcc_mbox_irq, + irqflags, MBOX_IRQ_NAME, chan); if (unlikely(rc)) { dev_err(chan->mbox->dev, "failed to register PCC interrupt %d\n", pchan->plat_irq); @@ -494,6 +524,7 @@ static int pcc_parse_subspace_irq(struct pcc_chan_info *pchan, pcct_ss->platform_interrupt); return -EINVAL; } + pchan->plat_irq_flags = pcct_ss->flags; if (pcct_ss->header.type == ACPI_PCCT_TYPE_HW_REDUCED_SUBSPACE_TYPE2) { struct acpi_pcct_hw_reduced_type2 *pcct2_ss = (void *)pcct_ss; @@ -515,6 +546,12 @@ static int pcc_parse_subspace_irq(struct pcc_chan_info *pchan, "PLAT IRQ ACK"); } + if (pcc_chan_plat_irq_can_be_shared(pchan) && + !pchan->plat_irq_ack.gas) { + pr_err("PCC subspace has level IRQ with no ACK register\n"); + return -EINVAL; + } + return ret; } From e327e12b82076a7f7f818cead66b38881dc311c8 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Wed, 27 Sep 2023 17:26:11 +0100 Subject: [PATCH 043/216] i2c: xgene-slimpro: Migrate to use generic PCC shmem related macros commit 89a4ad1f437c049534891c3d83cd96d7c7debd2a upstream. Use the newly defined common and generic PCC shared memory region related macros in this driver to replace the locally defined ones. Reviewed-by: Andi Shyti Acked-by: Wolfram Sang Link: https://lore.kernel.org/r/20230927-pcc_defines-v2-2-0b8ffeaef2e5@arm.com Signed-off-by: Sudeep Holla Stable-dep-of: 7f9e19f207be ("mailbox: pcc: Check before sending MCTP PCC response ACK") Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 4460b5236818627b9f655884f0da607e6183b74e) --- drivers/i2c/busses/i2c-xgene-slimpro.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/drivers/i2c/busses/i2c-xgene-slimpro.c b/drivers/i2c/busses/i2c-xgene-slimpro.c index fbc1ffbd2fa7d..658396c9eeabf 100644 --- a/drivers/i2c/busses/i2c-xgene-slimpro.c +++ b/drivers/i2c/busses/i2c-xgene-slimpro.c @@ -91,14 +91,6 @@ #define SLIMPRO_IIC_MSG_DWORD_COUNT 3 -/* PCC related defines */ -#define PCC_SIGNATURE 0x50424300 -#define PCC_STS_CMD_COMPLETE BIT(0) -#define PCC_STS_SCI_DOORBELL BIT(1) -#define PCC_STS_ERR BIT(2) -#define PCC_STS_PLAT_NOTIFY BIT(3) -#define PCC_CMD_GENERATE_DB_INT BIT(15) - struct slimpro_i2c_dev { struct i2c_adapter adapter; struct device *dev; @@ -160,11 +152,11 @@ static void slimpro_i2c_pcc_rx_cb(struct mbox_client *cl, void *msg) /* Check if platform sends interrupt */ if (!xgene_word_tst_and_clr(&generic_comm_base->status, - PCC_STS_SCI_DOORBELL)) + PCC_STATUS_SCI_DOORBELL)) return; if (xgene_word_tst_and_clr(&generic_comm_base->status, - PCC_STS_CMD_COMPLETE)) { + PCC_STATUS_CMD_COMPLETE)) { msg = generic_comm_base + 1; /* Response message msg[1] contains the return value. */ @@ -186,10 +178,10 @@ static void slimpro_i2c_pcc_tx_prepare(struct slimpro_i2c_dev *ctx, u32 *msg) cpu_to_le32(PCC_SIGNATURE | ctx->mbox_idx)); WRITE_ONCE(generic_comm_base->command, - cpu_to_le16(SLIMPRO_MSG_TYPE(msg[0]) | PCC_CMD_GENERATE_DB_INT)); + cpu_to_le16(SLIMPRO_MSG_TYPE(msg[0]) | PCC_CMD_GENERATE_DB_INTR)); status = le16_to_cpu(READ_ONCE(generic_comm_base->status)); - status &= ~PCC_STS_CMD_COMPLETE; + status &= ~PCC_STATUS_CMD_COMPLETE; WRITE_ONCE(generic_comm_base->status, cpu_to_le16(status)); /* Copy the message to the PCC comm space */ From f81ac615596800ac4df866e5d58ae9594c5db295 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Wed, 27 Sep 2023 17:26:10 +0100 Subject: [PATCH 044/216] ACPI: PCC: Add PCC shared memory region command and status bitfields [ Upstream commit 55d235ebb684b993b3247740c1c8e273f8af4a54 ] Define the common macros to use when referring to various bitfields in the PCC generic communications channel command and status fields. Currently different drivers that need to use these bitfields have defined these locally. This common macro is intended to consolidate and replace those. Cc: "Rafael J. Wysocki" Link: https://lore.kernel.org/r/20230927-pcc_defines-v2-1-0b8ffeaef2e5@arm.com Signed-off-by: Sudeep Holla Stable-dep-of: 7f9e19f207be ("mailbox: pcc: Check before sending MCTP PCC response ACK") Signed-off-by: Sasha Levin (cherry picked from commit d8c0f38208a41324f48349c0ebb5ea35cf2412d7) --- include/acpi/pcc.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/include/acpi/pcc.h b/include/acpi/pcc.h index 73e806fe7ce70..9b373d172a776 100644 --- a/include/acpi/pcc.h +++ b/include/acpi/pcc.h @@ -18,7 +18,20 @@ struct pcc_mbox_chan { u16 min_turnaround_time; }; +/* Generic Communications Channel Shared Memory Region */ +#define PCC_SIGNATURE 0x50434300 +/* Generic Communications Channel Command Field */ +#define PCC_CMD_GENERATE_DB_INTR BIT(15) +/* Generic Communications Channel Status Field */ +#define PCC_STATUS_CMD_COMPLETE BIT(0) +#define PCC_STATUS_SCI_DOORBELL BIT(1) +#define PCC_STATUS_ERROR BIT(2) +#define PCC_STATUS_PLATFORM_NOTIFY BIT(3) +/* Initiator Responder Communications Channel Flags */ +#define PCC_CMD_COMPLETION_NOTIFY BIT(0) + #define MAX_PCC_SUBSPACES 256 + #ifdef CONFIG_PCC extern struct pcc_mbox_chan * pcc_mbox_request_channel(struct mbox_client *cl, int subspace_id); From 0144515f0299d8dd125c82766b0e947153cd29ea Mon Sep 17 00:00:00 2001 From: Adam Young Date: Wed, 20 Nov 2024 14:02:14 -0500 Subject: [PATCH 045/216] mailbox: pcc: Check before sending MCTP PCC response ACK [ Upstream commit 7f9e19f207be0c534d517d65e01417ba968cdd34 ] Type 4 PCC channels have an option to send back a response to the platform when they are done processing the request. The flag to indicate whether or not to respond is inside the message body, and thus is not available to the pcc mailbox. If the flag is not set, still set command completion bit after processing message. In order to read the flag, this patch maps the shared buffer to virtual memory. To avoid duplication of mapping the shared buffer is then made available to be used by the driver that uses the mailbox. Signed-off-by: Adam Young Cc: Sudeep Holla Signed-off-by: Jassi Brar Signed-off-by: Sasha Levin (cherry picked from commit 3c9d3157f3cc7616637383f5e1f19abbc07bf9c2) --- drivers/mailbox/pcc.c | 61 +++++++++++++++++++++++++++++++++++++------ include/acpi/pcc.h | 7 +++++ 2 files changed, 60 insertions(+), 8 deletions(-) diff --git a/drivers/mailbox/pcc.c b/drivers/mailbox/pcc.c index 94885e411085a..82102a4c5d688 100644 --- a/drivers/mailbox/pcc.c +++ b/drivers/mailbox/pcc.c @@ -269,6 +269,35 @@ static bool pcc_mbox_cmd_complete_check(struct pcc_chan_info *pchan) return !!val; } +static void check_and_ack(struct pcc_chan_info *pchan, struct mbox_chan *chan) +{ + struct acpi_pcct_ext_pcc_shared_memory pcc_hdr; + + if (pchan->type != ACPI_PCCT_TYPE_EXT_PCC_SLAVE_SUBSPACE) + return; + /* If the memory region has not been mapped, we cannot + * determine if we need to send the message, but we still + * need to set the cmd_update flag before returning. + */ + if (pchan->chan.shmem == NULL) { + pcc_chan_reg_read_modify_write(&pchan->cmd_update); + return; + } + memcpy_fromio(&pcc_hdr, pchan->chan.shmem, + sizeof(struct acpi_pcct_ext_pcc_shared_memory)); + /* + * The PCC slave subspace channel needs to set the command complete bit + * after processing message. If the PCC_ACK_FLAG is set, it should also + * ring the doorbell. + * + * The PCC master subspace channel clears chan_in_use to free channel. + */ + if (le32_to_cpup(&pcc_hdr.flags) & PCC_ACK_FLAG_MASK) + pcc_send_data(chan, NULL); + else + pcc_chan_reg_read_modify_write(&pchan->cmd_update); +} + /** * pcc_mbox_irq - PCC mailbox interrupt handler * @irq: interrupt number @@ -306,14 +335,7 @@ static irqreturn_t pcc_mbox_irq(int irq, void *p) mbox_chan_received_data(chan, NULL); - /* - * The PCC slave subspace channel needs to set the command complete bit - * and ring doorbell after processing message. - * - * The PCC master subspace channel clears chan_in_use to free channel. - */ - if (pchan->type == ACPI_PCCT_TYPE_EXT_PCC_SLAVE_SUBSPACE) - pcc_send_data(chan, NULL); + check_and_ack(pchan, chan); pchan->chan_in_use = false; return IRQ_HANDLED; @@ -365,14 +387,37 @@ EXPORT_SYMBOL_GPL(pcc_mbox_request_channel); void pcc_mbox_free_channel(struct pcc_mbox_chan *pchan) { struct mbox_chan *chan = pchan->mchan; + struct pcc_chan_info *pchan_info; + struct pcc_mbox_chan *pcc_mbox_chan; if (!chan || !chan->cl) return; + pchan_info = chan->con_priv; + pcc_mbox_chan = &pchan_info->chan; + if (pcc_mbox_chan->shmem) { + iounmap(pcc_mbox_chan->shmem); + pcc_mbox_chan->shmem = NULL; + } mbox_free_channel(chan); } EXPORT_SYMBOL_GPL(pcc_mbox_free_channel); +int pcc_mbox_ioremap(struct mbox_chan *chan) +{ + struct pcc_chan_info *pchan_info; + struct pcc_mbox_chan *pcc_mbox_chan; + + if (!chan || !chan->cl) + return -1; + pchan_info = chan->con_priv; + pcc_mbox_chan = &pchan_info->chan; + pcc_mbox_chan->shmem = ioremap(pcc_mbox_chan->shmem_base_addr, + pcc_mbox_chan->shmem_size); + return 0; +} +EXPORT_SYMBOL_GPL(pcc_mbox_ioremap); + /** * pcc_send_data - Called from Mailbox Controller code. Used * here only to ring the channel doorbell. The PCC client diff --git a/include/acpi/pcc.h b/include/acpi/pcc.h index 9b373d172a776..699c1a37b8e78 100644 --- a/include/acpi/pcc.h +++ b/include/acpi/pcc.h @@ -12,6 +12,7 @@ struct pcc_mbox_chan { struct mbox_chan *mchan; u64 shmem_base_addr; + void __iomem *shmem; u64 shmem_size; u32 latency; u32 max_access_rate; @@ -31,11 +32,13 @@ struct pcc_mbox_chan { #define PCC_CMD_COMPLETION_NOTIFY BIT(0) #define MAX_PCC_SUBSPACES 256 +#define PCC_ACK_FLAG_MASK 0x1 #ifdef CONFIG_PCC extern struct pcc_mbox_chan * pcc_mbox_request_channel(struct mbox_client *cl, int subspace_id); extern void pcc_mbox_free_channel(struct pcc_mbox_chan *chan); +extern int pcc_mbox_ioremap(struct mbox_chan *chan); #else static inline struct pcc_mbox_chan * pcc_mbox_request_channel(struct mbox_client *cl, int subspace_id) @@ -43,6 +46,10 @@ pcc_mbox_request_channel(struct mbox_client *cl, int subspace_id) return ERR_PTR(-ENODEV); } static inline void pcc_mbox_free_channel(struct pcc_mbox_chan *chan) { } +static inline int pcc_mbox_ioremap(struct mbox_chan *chan) +{ + return 0; +}; #endif #endif /* _PCC_H */ From 5b157ca99edfb1a0d13cc38e08b532cae1917372 Mon Sep 17 00:00:00 2001 From: Nikita Travkin Date: Thu, 7 Sep 2023 15:02:35 +0500 Subject: [PATCH 046/216] remoteproc: qcom: pas: Add sc7180 adsp [ Upstream commit 8de60bbab994bf8165d7d10e974872852da47aa7 ] sc7180 has a dedicated ADSP similar to the one found in sm8250. Add it's compatible to the driver reusing the existing config so the devices that use the adsp can probe it. Signed-off-by: Nikita Travkin Reviewed-by: Stephen Boyd Link: https://lore.kernel.org/r/20230907-sc7180-adsp-rproc-v3-2-6515c3fbe0a3@trvn.ru Signed-off-by: Bjorn Andersson Stable-dep-of: 009e288c989b ("remoteproc: qcom: pas: enable SAR2130P audio DSP support") Signed-off-by: Sasha Levin (cherry picked from commit 25804f9b492b3007e08122d98b935aee4475f0e1) --- drivers/remoteproc/qcom_q6v5_pas.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/remoteproc/qcom_q6v5_pas.c b/drivers/remoteproc/qcom_q6v5_pas.c index 6235721f2c1ae..fd66bb8b23f86 100644 --- a/drivers/remoteproc/qcom_q6v5_pas.c +++ b/drivers/remoteproc/qcom_q6v5_pas.c @@ -1163,6 +1163,7 @@ static const struct of_device_id adsp_of_match[] = { { .compatible = "qcom,qcs404-adsp-pas", .data = &adsp_resource_init }, { .compatible = "qcom,qcs404-cdsp-pas", .data = &cdsp_resource_init }, { .compatible = "qcom,qcs404-wcss-pas", .data = &wcss_resource_init }, + { .compatible = "qcom,sc7180-adsp-pas", .data = &sm8250_adsp_resource}, { .compatible = "qcom,sc7180-mpss-pas", .data = &mpss_resource_init}, { .compatible = "qcom,sc7280-mpss-pas", .data = &mpss_resource_init}, { .compatible = "qcom,sc8180x-adsp-pas", .data = &sm8150_adsp_resource}, From de4802444ab9cdd9424a1560c09cbe89e821ffbb Mon Sep 17 00:00:00 2001 From: Tengfei Fan Date: Mon, 5 Aug 2024 19:08:04 +0200 Subject: [PATCH 047/216] remoteproc: qcom: pas: Add support for SA8775p ADSP, CDSP and GPDSP [ Upstream commit 9091225ba28c0106d3cd041c7abf5551a94bb524 ] Add support for PIL loading on ADSP, CDSP0, CDSP1, GPDSP0 and GPDSP1 on SA8775p SoCs. Signed-off-by: Tengfei Fan Co-developed-by: Bartosz Golaszewski Signed-off-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20240805-topic-sa8775p-iot-remoteproc-v4-3-86affdc72c04@linaro.org Signed-off-by: Bjorn Andersson Stable-dep-of: 009e288c989b ("remoteproc: qcom: pas: enable SAR2130P audio DSP support") Signed-off-by: Sasha Levin (cherry picked from commit b506a0c41411683b74814bf5fef5c114184a0fdf) --- drivers/remoteproc/qcom_q6v5_pas.c | 92 ++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) diff --git a/drivers/remoteproc/qcom_q6v5_pas.c b/drivers/remoteproc/qcom_q6v5_pas.c index fd66bb8b23f86..4a73723e375ae 100644 --- a/drivers/remoteproc/qcom_q6v5_pas.c +++ b/drivers/remoteproc/qcom_q6v5_pas.c @@ -786,6 +786,23 @@ static const struct adsp_data adsp_resource_init = { .ssctl_id = 0x14, }; +static const struct adsp_data sa8775p_adsp_resource = { + .crash_reason_smem = 423, + .firmware_name = "adsp.mbn", + .pas_id = 1, + .minidump_id = 5, + .auto_boot = true, + .proxy_pd_names = (char*[]){ + "lcx", + "lmx", + NULL + }, + .load_state = "adsp", + .ssr_name = "lpass", + .sysmon_name = "adsp", + .ssctl_id = 0x14, +}; + static const struct adsp_data sdm845_adsp_resource_init = { .crash_reason_smem = 423, .firmware_name = "adsp.mdt", @@ -885,6 +902,42 @@ static const struct adsp_data cdsp_resource_init = { .ssctl_id = 0x17, }; +static const struct adsp_data sa8775p_cdsp0_resource = { + .crash_reason_smem = 601, + .firmware_name = "cdsp0.mbn", + .pas_id = 18, + .minidump_id = 7, + .auto_boot = true, + .proxy_pd_names = (char*[]){ + "cx", + "mxc", + "nsp", + NULL + }, + .load_state = "cdsp", + .ssr_name = "cdsp", + .sysmon_name = "cdsp", + .ssctl_id = 0x17, +}; + +static const struct adsp_data sa8775p_cdsp1_resource = { + .crash_reason_smem = 633, + .firmware_name = "cdsp1.mbn", + .pas_id = 30, + .minidump_id = 20, + .auto_boot = true, + .proxy_pd_names = (char*[]){ + "cx", + "mxc", + "nsp", + NULL + }, + .load_state = "nsp", + .ssr_name = "cdsp1", + .sysmon_name = "cdsp1", + .ssctl_id = 0x20, +}; + static const struct adsp_data sdm845_cdsp_resource_init = { .crash_reason_smem = 601, .firmware_name = "cdsp.mdt", @@ -987,6 +1040,40 @@ static const struct adsp_data sm8350_cdsp_resource = { .ssctl_id = 0x17, }; +static const struct adsp_data sa8775p_gpdsp0_resource = { + .crash_reason_smem = 640, + .firmware_name = "gpdsp0.mbn", + .pas_id = 39, + .minidump_id = 21, + .auto_boot = true, + .proxy_pd_names = (char*[]){ + "cx", + "mxc", + NULL + }, + .load_state = "gpdsp0", + .ssr_name = "gpdsp0", + .sysmon_name = "gpdsp0", + .ssctl_id = 0x21, +}; + +static const struct adsp_data sa8775p_gpdsp1_resource = { + .crash_reason_smem = 641, + .firmware_name = "gpdsp1.mbn", + .pas_id = 40, + .minidump_id = 22, + .auto_boot = true, + .proxy_pd_names = (char*[]){ + "cx", + "mxc", + NULL + }, + .load_state = "gpdsp1", + .ssr_name = "gpdsp1", + .sysmon_name = "gpdsp1", + .ssctl_id = 0x22, +}; + static const struct adsp_data mpss_resource_init = { .crash_reason_smem = 421, .firmware_name = "modem.mdt", @@ -1163,6 +1250,11 @@ static const struct of_device_id adsp_of_match[] = { { .compatible = "qcom,qcs404-adsp-pas", .data = &adsp_resource_init }, { .compatible = "qcom,qcs404-cdsp-pas", .data = &cdsp_resource_init }, { .compatible = "qcom,qcs404-wcss-pas", .data = &wcss_resource_init }, + { .compatible = "qcom,sa8775p-adsp-pas", .data = &sa8775p_adsp_resource}, + { .compatible = "qcom,sa8775p-cdsp0-pas", .data = &sa8775p_cdsp0_resource}, + { .compatible = "qcom,sa8775p-cdsp1-pas", .data = &sa8775p_cdsp1_resource}, + { .compatible = "qcom,sa8775p-gpdsp0-pas", .data = &sa8775p_gpdsp0_resource}, + { .compatible = "qcom,sa8775p-gpdsp1-pas", .data = &sa8775p_gpdsp1_resource}, { .compatible = "qcom,sc7180-adsp-pas", .data = &sm8250_adsp_resource}, { .compatible = "qcom,sc7180-mpss-pas", .data = &mpss_resource_init}, { .compatible = "qcom,sc7280-mpss-pas", .data = &mpss_resource_init}, From dcdfcfe6fe71dcec0b3375babf282745a1804b9c Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sun, 27 Oct 2024 01:09:45 +0300 Subject: [PATCH 048/216] remoteproc: qcom: pas: enable SAR2130P audio DSP support [ Upstream commit 009e288c989b3fe548a45c82da407d7bd00418a9 ] Enable support for the Audio DSP on the Qualcomm SAR2130P platform, reusing the SM8350 resources. Signed-off-by: Dmitry Baryshkov Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20241027-sar2130p-adsp-v1-3-bd204e39d24e@linaro.org Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin (cherry picked from commit 171c40531b046cf7e29365eca2156cc6df74027a) --- drivers/remoteproc/qcom_q6v5_pas.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/remoteproc/qcom_q6v5_pas.c b/drivers/remoteproc/qcom_q6v5_pas.c index 4a73723e375ae..fd6bf9e77afcb 100644 --- a/drivers/remoteproc/qcom_q6v5_pas.c +++ b/drivers/remoteproc/qcom_q6v5_pas.c @@ -1255,6 +1255,7 @@ static const struct of_device_id adsp_of_match[] = { { .compatible = "qcom,sa8775p-cdsp1-pas", .data = &sa8775p_cdsp1_resource}, { .compatible = "qcom,sa8775p-gpdsp0-pas", .data = &sa8775p_gpdsp0_resource}, { .compatible = "qcom,sa8775p-gpdsp1-pas", .data = &sa8775p_gpdsp1_resource}, + { .compatible = "qcom,sar2130p-adsp-pas", .data = &sm8350_adsp_resource}, { .compatible = "qcom,sc7180-adsp-pas", .data = &sm8250_adsp_resource}, { .compatible = "qcom,sc7180-mpss-pas", .data = &mpss_resource_init}, { .compatible = "qcom,sc7280-mpss-pas", .data = &mpss_resource_init}, From 3e9d04f696e9388af576ec38e8ec11175bfa2ac5 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Thu, 18 Jul 2024 17:45:12 +0300 Subject: [PATCH 049/216] fs/ntfs3: Implement fallocate for compressed files [ Upstream commit 9a2d6a40b8a1a6fa62eaf47ceee10a5eef62284c ] Signed-off-by: Konstantin Komarov Stable-dep-of: e2705dd3d16d ("fs/ntfs3: Fix warning in ni_fiemap") Signed-off-by: Sasha Levin (cherry picked from commit 48ebb93f933d032e88bd28208e13049b574a90f2) --- fs/ntfs3/attrib.c | 25 +++++++++++++++---------- fs/ntfs3/inode.c | 3 ++- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/fs/ntfs3/attrib.c b/fs/ntfs3/attrib.c index fc6cea60044ed..582628b9b7967 100644 --- a/fs/ntfs3/attrib.c +++ b/fs/ntfs3/attrib.c @@ -977,15 +977,17 @@ int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn, goto out; /* Check for compressed frame. */ - err = attr_is_frame_compressed(ni, attr, vcn >> NTFS_LZNT_CUNIT, &hint); + err = attr_is_frame_compressed(ni, attr_b, vcn >> NTFS_LZNT_CUNIT, + &hint); if (err) goto out; if (hint) { /* if frame is compressed - don't touch it. */ *lcn = COMPRESSED_LCN; - *len = hint; - err = -EOPNOTSUPP; + /* length to the end of frame. */ + *len = NTFS_LZNT_CLUSTERS - (vcn & (NTFS_LZNT_CLUSTERS - 1)); + err = 0; goto out; } @@ -1028,16 +1030,16 @@ int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn, /* Check if 'vcn' and 'vcn0' in different attribute segments. */ if (vcn < svcn || evcn1 <= vcn) { - /* Load attribute for truncated vcn. */ - attr = ni_find_attr(ni, attr_b, &le, ATTR_DATA, NULL, 0, - &vcn, &mi); - if (!attr) { + struct ATTRIB *attr2; + /* Load runs for truncated vcn. */ + attr2 = ni_find_attr(ni, attr_b, &le_b, ATTR_DATA, NULL, + 0, &vcn, &mi); + if (!attr2) { err = -EINVAL; goto out; } - svcn = le64_to_cpu(attr->nres.svcn); - evcn1 = le64_to_cpu(attr->nres.evcn) + 1; - err = attr_load_runs(attr, ni, run, NULL); + evcn1 = le64_to_cpu(attr2->nres.evcn) + 1; + err = attr_load_runs(attr2, ni, run, NULL); if (err) goto out; } @@ -1530,6 +1532,9 @@ int attr_wof_frame_info(struct ntfs_inode *ni, struct ATTRIB *attr, /* * attr_is_frame_compressed - Used to detect compressed frame. + * + * attr - base (primary) attribute segment. + * Only base segments contains valid 'attr->nres.c_unit' */ int attr_is_frame_compressed(struct ntfs_inode *ni, struct ATTRIB *attr, CLST frame, CLST *clst_data) diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c index 52b80fd159147..af7c0cbba74e3 100644 --- a/fs/ntfs3/inode.c +++ b/fs/ntfs3/inode.c @@ -604,7 +604,8 @@ static noinline int ntfs_get_block_vbo(struct inode *inode, u64 vbo, bytes = ((u64)len << cluster_bits) - off; - if (lcn == SPARSE_LCN) { + if (lcn >= sbi->used.bitmap.nbits) { + /* This case includes resident/compressed/sparse. */ if (!create) { if (bh->b_size > bytes) bh->b_size = bytes; From a92aeaa5749bf7ff362fd65ccb21c02807272de2 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Tue, 8 Oct 2024 10:48:15 +0300 Subject: [PATCH 050/216] fs/ntfs3: Fix warning in ni_fiemap [ Upstream commit e2705dd3d16d1000f1fd8193d82447065de8c899 ] Use local runs_tree instead of cached. This way excludes rw_semaphore lock. Reported-by: syzbot+1c25748a40fe79b8a119@syzkaller.appspotmail.com Signed-off-by: Konstantin Komarov Signed-off-by: Sasha Levin (cherry picked from commit c2556801583cf88255b87cea59254c85aa28f084) --- fs/ntfs3/attrib.c | 9 ++-- fs/ntfs3/frecord.c | 103 +++++++-------------------------------------- fs/ntfs3/ntfs_fs.h | 3 +- 3 files changed, 21 insertions(+), 94 deletions(-) diff --git a/fs/ntfs3/attrib.c b/fs/ntfs3/attrib.c index 582628b9b7967..e25989dd2c6bb 100644 --- a/fs/ntfs3/attrib.c +++ b/fs/ntfs3/attrib.c @@ -978,7 +978,7 @@ int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn, /* Check for compressed frame. */ err = attr_is_frame_compressed(ni, attr_b, vcn >> NTFS_LZNT_CUNIT, - &hint); + &hint, run); if (err) goto out; @@ -1534,16 +1534,16 @@ int attr_wof_frame_info(struct ntfs_inode *ni, struct ATTRIB *attr, * attr_is_frame_compressed - Used to detect compressed frame. * * attr - base (primary) attribute segment. + * run - run to use, usually == &ni->file.run. * Only base segments contains valid 'attr->nres.c_unit' */ int attr_is_frame_compressed(struct ntfs_inode *ni, struct ATTRIB *attr, - CLST frame, CLST *clst_data) + CLST frame, CLST *clst_data, struct runs_tree *run) { int err; u32 clst_frame; CLST clen, lcn, vcn, alen, slen, vcn_next; size_t idx; - struct runs_tree *run; *clst_data = 0; @@ -1555,7 +1555,6 @@ int attr_is_frame_compressed(struct ntfs_inode *ni, struct ATTRIB *attr, clst_frame = 1u << attr->nres.c_unit; vcn = frame * clst_frame; - run = &ni->file.run; if (!run_lookup_entry(run, vcn, &lcn, &clen, &idx)) { err = attr_load_runs_vcn(ni, attr->type, attr_name(attr), @@ -1691,7 +1690,7 @@ int attr_allocate_frame(struct ntfs_inode *ni, CLST frame, size_t compr_size, if (err) goto out; - err = attr_is_frame_compressed(ni, attr_b, frame, &clst_data); + err = attr_is_frame_compressed(ni, attr_b, frame, &clst_data, run); if (err) goto out; diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c index 12e03feb3074a..3c876c468c2c4 100644 --- a/fs/ntfs3/frecord.c +++ b/fs/ntfs3/frecord.c @@ -1900,46 +1900,6 @@ enum REPARSE_SIGN ni_parse_reparse(struct ntfs_inode *ni, struct ATTRIB *attr, return REPARSE_LINK; } -/* - * fiemap_fill_next_extent_k - a copy of fiemap_fill_next_extent - * but it uses 'fe_k' instead of fieinfo->fi_extents_start - */ -static int fiemap_fill_next_extent_k(struct fiemap_extent_info *fieinfo, - struct fiemap_extent *fe_k, u64 logical, - u64 phys, u64 len, u32 flags) -{ - struct fiemap_extent extent; - - /* only count the extents */ - if (fieinfo->fi_extents_max == 0) { - fieinfo->fi_extents_mapped++; - return (flags & FIEMAP_EXTENT_LAST) ? 1 : 0; - } - - if (fieinfo->fi_extents_mapped >= fieinfo->fi_extents_max) - return 1; - - if (flags & FIEMAP_EXTENT_DELALLOC) - flags |= FIEMAP_EXTENT_UNKNOWN; - if (flags & FIEMAP_EXTENT_DATA_ENCRYPTED) - flags |= FIEMAP_EXTENT_ENCODED; - if (flags & (FIEMAP_EXTENT_DATA_TAIL | FIEMAP_EXTENT_DATA_INLINE)) - flags |= FIEMAP_EXTENT_NOT_ALIGNED; - - memset(&extent, 0, sizeof(extent)); - extent.fe_logical = logical; - extent.fe_physical = phys; - extent.fe_length = len; - extent.fe_flags = flags; - - memcpy(fe_k + fieinfo->fi_extents_mapped, &extent, sizeof(extent)); - - fieinfo->fi_extents_mapped++; - if (fieinfo->fi_extents_mapped == fieinfo->fi_extents_max) - return 1; - return (flags & FIEMAP_EXTENT_LAST) ? 1 : 0; -} - /* * ni_fiemap - Helper for file_fiemap(). * @@ -1950,11 +1910,9 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo, __u64 vbo, __u64 len) { int err = 0; - struct fiemap_extent *fe_k = NULL; struct ntfs_sb_info *sbi = ni->mi.sbi; u8 cluster_bits = sbi->cluster_bits; - struct runs_tree *run; - struct rw_semaphore *run_lock; + struct runs_tree run; struct ATTRIB *attr; CLST vcn = vbo >> cluster_bits; CLST lcn, clen; @@ -1965,13 +1923,11 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo, u32 flags; bool ok; + run_init(&run); if (S_ISDIR(ni->vfs_inode.i_mode)) { - run = &ni->dir.alloc_run; attr = ni_find_attr(ni, NULL, NULL, ATTR_ALLOC, I30_NAME, ARRAY_SIZE(I30_NAME), NULL, NULL); - run_lock = &ni->dir.run_lock; } else { - run = &ni->file.run; attr = ni_find_attr(ni, NULL, NULL, ATTR_DATA, NULL, 0, NULL, NULL); if (!attr) { @@ -1986,7 +1942,6 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo, "fiemap is not supported for compressed file (cp -r)"); goto out; } - run_lock = &ni->file.run_lock; } if (!attr || !attr->non_res) { @@ -1998,51 +1953,33 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo, goto out; } - /* - * To avoid lock problems replace pointer to user memory by pointer to kernel memory. - */ - fe_k = kmalloc_array(fieinfo->fi_extents_max, - sizeof(struct fiemap_extent), - GFP_NOFS | __GFP_ZERO); - if (!fe_k) { - err = -ENOMEM; - goto out; - } - end = vbo + len; alloc_size = le64_to_cpu(attr->nres.alloc_size); if (end > alloc_size) end = alloc_size; - down_read(run_lock); while (vbo < end) { if (idx == -1) { - ok = run_lookup_entry(run, vcn, &lcn, &clen, &idx); + ok = run_lookup_entry(&run, vcn, &lcn, &clen, &idx); } else { CLST vcn_next = vcn; - ok = run_get_entry(run, ++idx, &vcn, &lcn, &clen) && + ok = run_get_entry(&run, ++idx, &vcn, &lcn, &clen) && vcn == vcn_next; if (!ok) vcn = vcn_next; } if (!ok) { - up_read(run_lock); - down_write(run_lock); - err = attr_load_runs_vcn(ni, attr->type, attr_name(attr), - attr->name_len, run, vcn); - - up_write(run_lock); - down_read(run_lock); + attr->name_len, &run, vcn); if (err) break; - ok = run_lookup_entry(run, vcn, &lcn, &clen, &idx); + ok = run_lookup_entry(&run, vcn, &lcn, &clen, &idx); if (!ok) { err = -EINVAL; @@ -2067,8 +2004,9 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo, } else if (is_attr_compressed(attr)) { CLST clst_data; - err = attr_is_frame_compressed( - ni, attr, vcn >> attr->nres.c_unit, &clst_data); + err = attr_is_frame_compressed(ni, attr, + vcn >> attr->nres.c_unit, + &clst_data, &run); if (err) break; if (clst_data < NTFS_LZNT_CLUSTERS) @@ -2097,8 +2035,8 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo, if (vbo + dlen >= end) flags |= FIEMAP_EXTENT_LAST; - err = fiemap_fill_next_extent_k(fieinfo, fe_k, vbo, lbo, - dlen, flags); + err = fiemap_fill_next_extent(fieinfo, vbo, lbo, dlen, + flags); if (err < 0) break; @@ -2119,8 +2057,7 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo, if (vbo + bytes >= end) flags |= FIEMAP_EXTENT_LAST; - err = fiemap_fill_next_extent_k(fieinfo, fe_k, vbo, lbo, bytes, - flags); + err = fiemap_fill_next_extent(fieinfo, vbo, lbo, bytes, flags); if (err < 0) break; if (err == 1) { @@ -2131,19 +2068,8 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo, vbo += bytes; } - up_read(run_lock); - - /* - * Copy to user memory out of lock - */ - if (copy_to_user(fieinfo->fi_extents_start, fe_k, - fieinfo->fi_extents_max * - sizeof(struct fiemap_extent))) { - err = -EFAULT; - } - out: - kfree(fe_k); + run_close(&run); return err; } @@ -2674,7 +2600,8 @@ int ni_read_frame(struct ntfs_inode *ni, u64 frame_vbo, struct page **pages, down_write(&ni->file.run_lock); run_truncate_around(run, le64_to_cpu(attr->nres.svcn)); frame = frame_vbo >> (cluster_bits + NTFS_LZNT_CUNIT); - err = attr_is_frame_compressed(ni, attr, frame, &clst_data); + err = attr_is_frame_compressed(ni, attr, frame, &clst_data, + run); up_write(&ni->file.run_lock); if (err) goto out1; diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h index cfe9d3bf07f91..c98e6868bfbad 100644 --- a/fs/ntfs3/ntfs_fs.h +++ b/fs/ntfs3/ntfs_fs.h @@ -446,7 +446,8 @@ int attr_wof_frame_info(struct ntfs_inode *ni, struct ATTRIB *attr, struct runs_tree *run, u64 frame, u64 frames, u8 frame_bits, u32 *ondisk_size, u64 *vbo_data); int attr_is_frame_compressed(struct ntfs_inode *ni, struct ATTRIB *attr, - CLST frame, CLST *clst_data); + CLST frame, CLST *clst_data, + struct runs_tree *run); int attr_allocate_frame(struct ntfs_inode *ni, CLST frame, size_t compr_size, u64 new_valid); int attr_collapse_range(struct ntfs_inode *ni, u64 vbo, u64 bytes); From 54a76f3f50515df9950aa995a6a577f85ede413d Mon Sep 17 00:00:00 2001 From: Tomer Maimon Date: Tue, 17 Oct 2023 22:59:01 +0300 Subject: [PATCH 051/216] usb: chipidea: add CI_HDRC_FORCE_VBUS_ACTIVE_ALWAYS flag [ Upstream commit 2978cc1f285390c1bd4d9bfc665747adc6e4b19c ] Adding CI_HDRC_FORCE_VBUS_ACTIVE_ALWAYS flag to modify the vbus_active parameter to active in case the ChipIdea USB IP role is device-only and there is no otgsc register. Signed-off-by: Tomer Maimon Acked-by: Peter Chen Link: https://lore.kernel.org/r/20231017195903.1665260-2-tmaimon77@gmail.com Signed-off-by: Greg Kroah-Hartman Stable-dep-of: ec841b8d73cf ("usb: chipidea: add CI_HDRC_HAS_SHORT_PKT_LIMIT flag") Signed-off-by: Sasha Levin (cherry picked from commit c39df6d3af2d80968a5e9fb7c5aaccb6b4011ad8) --- drivers/usb/chipidea/otg.c | 5 ++++- include/linux/usb/chipidea.h | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/usb/chipidea/otg.c b/drivers/usb/chipidea/otg.c index f5490f2a5b6bc..647e98f4e3511 100644 --- a/drivers/usb/chipidea/otg.c +++ b/drivers/usb/chipidea/otg.c @@ -130,8 +130,11 @@ enum ci_role ci_otg_role(struct ci_hdrc *ci) void ci_handle_vbus_change(struct ci_hdrc *ci) { - if (!ci->is_otg) + if (!ci->is_otg) { + if (ci->platdata->flags & CI_HDRC_FORCE_VBUS_ACTIVE_ALWAYS) + usb_gadget_vbus_connect(&ci->gadget); return; + } if (hw_read_otgsc(ci, OTGSC_BSV) && !ci->vbus_active) usb_gadget_vbus_connect(&ci->gadget); diff --git a/include/linux/usb/chipidea.h b/include/linux/usb/chipidea.h index 0b4f2d5faa080..5a7f96684ea22 100644 --- a/include/linux/usb/chipidea.h +++ b/include/linux/usb/chipidea.h @@ -64,6 +64,7 @@ struct ci_hdrc_platform_data { #define CI_HDRC_PMQOS BIT(15) #define CI_HDRC_PHY_VBUS_CONTROL BIT(16) #define CI_HDRC_HAS_PORTSC_PEC_MISSED BIT(17) +#define CI_HDRC_FORCE_VBUS_ACTIVE_ALWAYS BIT(18) enum usb_dr_mode dr_mode; #define CI_HDRC_CONTROLLER_RESET_EVENT 0 #define CI_HDRC_CONTROLLER_STOPPED_EVENT 1 From d5fe081dc1d8bef280730dfad2d4e823d7f292e6 Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Mon, 23 Sep 2024 16:12:01 +0800 Subject: [PATCH 052/216] usb: chipidea: add CI_HDRC_HAS_SHORT_PKT_LIMIT flag [ Upstream commit ec841b8d73cff37f8960e209017efe1eb2fb21f2 ] Currently, the imx deivice controller has below limitations: 1. can't generate short packet interrupt if IOC not set in dTD. So if one request span more than one dTDs and only the last dTD set IOC, the usb request will pending there if no more data comes. 2. the controller can't accurately deliver data to differtent usb requests in some cases due to short packet. For example: one usb request span 3 dTDs, then if the controller received a short packet the next packet will go to 2nd dTD of current request rather than the first dTD of next request. 3. can't build a bus packet use multiple dTDs. For example: controller needs to send one packet of 512 bytes use dTD1 (200 bytes) + dTD2 (312 bytes), actually the host side will see 200 bytes short packet. Based on these limits, add CI_HDRC_HAS_SHORT_PKT_LIMIT flag and use it on imx platforms. Signed-off-by: Xu Yang Acked-by: Peter Chen Link: https://lore.kernel.org/r/20240923081203.2851768-1-xu.yang_2@nxp.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin (cherry picked from commit 7a2020e83b6a54783163dc7fc46ded3f84bf2644) --- drivers/usb/chipidea/ci.h | 1 + drivers/usb/chipidea/ci_hdrc_imx.c | 1 + drivers/usb/chipidea/core.c | 2 ++ include/linux/usb/chipidea.h | 1 + 4 files changed, 5 insertions(+) diff --git a/drivers/usb/chipidea/ci.h b/drivers/usb/chipidea/ci.h index 2a38e1eb65466..e4b003d060c26 100644 --- a/drivers/usb/chipidea/ci.h +++ b/drivers/usb/chipidea/ci.h @@ -260,6 +260,7 @@ struct ci_hdrc { bool b_sess_valid_event; bool imx28_write_fix; bool has_portsc_pec_bug; + bool has_short_pkt_limit; bool supports_runtime_pm; bool in_lpm; bool wakeup_int; diff --git a/drivers/usb/chipidea/ci_hdrc_imx.c b/drivers/usb/chipidea/ci_hdrc_imx.c index e28bb2f2612dc..477af457c1a1f 100644 --- a/drivers/usb/chipidea/ci_hdrc_imx.c +++ b/drivers/usb/chipidea/ci_hdrc_imx.c @@ -334,6 +334,7 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev) struct ci_hdrc_platform_data pdata = { .name = dev_name(&pdev->dev), .capoffset = DEF_CAPOFFSET, + .flags = CI_HDRC_HAS_SHORT_PKT_LIMIT, .notify_event = ci_hdrc_imx_notify_event, }; int ret; diff --git a/drivers/usb/chipidea/core.c b/drivers/usb/chipidea/core.c index ca71df4f32e4c..c161a4ee52906 100644 --- a/drivers/usb/chipidea/core.c +++ b/drivers/usb/chipidea/core.c @@ -1076,6 +1076,8 @@ static int ci_hdrc_probe(struct platform_device *pdev) CI_HDRC_SUPPORTS_RUNTIME_PM); ci->has_portsc_pec_bug = !!(ci->platdata->flags & CI_HDRC_HAS_PORTSC_PEC_MISSED); + ci->has_short_pkt_limit = !!(ci->platdata->flags & + CI_HDRC_HAS_SHORT_PKT_LIMIT); platform_set_drvdata(pdev, ci); ret = hw_device_init(ci, base); diff --git a/include/linux/usb/chipidea.h b/include/linux/usb/chipidea.h index 5a7f96684ea22..ebdfef124b2bc 100644 --- a/include/linux/usb/chipidea.h +++ b/include/linux/usb/chipidea.h @@ -65,6 +65,7 @@ struct ci_hdrc_platform_data { #define CI_HDRC_PHY_VBUS_CONTROL BIT(16) #define CI_HDRC_HAS_PORTSC_PEC_MISSED BIT(17) #define CI_HDRC_FORCE_VBUS_ACTIVE_ALWAYS BIT(18) +#define CI_HDRC_HAS_SHORT_PKT_LIMIT BIT(19) enum usb_dr_mode dr_mode; #define CI_HDRC_CONTROLLER_RESET_EVENT 0 #define CI_HDRC_CONTROLLER_STOPPED_EVENT 1 From 762557a20cd43368dbb968b480b96598688d70fc Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Mon, 23 Sep 2024 16:12:02 +0800 Subject: [PATCH 053/216] usb: chipidea: udc: limit usb request length to max 16KB [ Upstream commit ca8d18aa7b0f22d66a3ca9a90d8f73431b8eca89 ] To let the device controller work properly on short packet limitations, one usb request should only correspond to one dTD. Then every dTD will set IOC. In theory, each dTD support up to 20KB data transfer if the offset is 0. Due to we cannot predetermine the offset, this will limit the usb request length to max 16KB. This should be fine since most of the user transfer data based on this size policy. Signed-off-by: Xu Yang Acked-by: Peter Chen Link: https://lore.kernel.org/r/20240923081203.2851768-2-xu.yang_2@nxp.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin (cherry picked from commit f7d548a62f082261875958e5479d79ca5a705318) --- drivers/usb/chipidea/ci.h | 1 + drivers/usb/chipidea/udc.c | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/drivers/usb/chipidea/ci.h b/drivers/usb/chipidea/ci.h index e4b003d060c26..97437de52ef68 100644 --- a/drivers/usb/chipidea/ci.h +++ b/drivers/usb/chipidea/ci.h @@ -25,6 +25,7 @@ #define TD_PAGE_COUNT 5 #define CI_HDRC_PAGE_SIZE 4096ul /* page size for TD's */ #define ENDPT_MAX 32 +#define CI_MAX_REQ_SIZE (4 * CI_HDRC_PAGE_SIZE) #define CI_MAX_BUF_SIZE (TD_PAGE_COUNT * CI_HDRC_PAGE_SIZE) /****************************************************************************** diff --git a/drivers/usb/chipidea/udc.c b/drivers/usb/chipidea/udc.c index 9f7d003e467b5..f2ae5f4c58283 100644 --- a/drivers/usb/chipidea/udc.c +++ b/drivers/usb/chipidea/udc.c @@ -959,6 +959,12 @@ static int _ep_queue(struct usb_ep *ep, struct usb_request *req, return -EMSGSIZE; } + if (ci->has_short_pkt_limit && + hwreq->req.length > CI_MAX_REQ_SIZE) { + dev_err(hwep->ci->dev, "request length too big (max 16KB)\n"); + return -EMSGSIZE; + } + /* first nuke then test link, e.g. previous status has not sent */ if (!list_empty(&hwreq->queue)) { dev_err(hwep->ci->dev, "request already in queue\n"); From 77ea01804f8a7ad86f5b49a6d7b4cfe5eb9263d7 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 18 Feb 2024 17:27:28 +0000 Subject: [PATCH 054/216] iio: adc: ad7192: Convert from of specific to fwnode property handling [ Upstream commit c3708c829a0662af429897a90aed46b70f14a50b ] Enables use of with other firmwware types. Removes a case of device tree specific handlers that might get copied into new drivers. Cc: Alisa-Dariana Roman Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20240218172731.1023367-6-jic23@kernel.org Signed-off-by: Jonathan Cameron Stable-dep-of: b7f99fa1b64a ("iio: adc: ad7192: properly check spi_get_device_match_data()") Signed-off-by: Sasha Levin (cherry picked from commit 843b5d1602d6f2d0b0af2044f03c177b02332395) --- drivers/iio/adc/ad7192.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/drivers/iio/adc/ad7192.c b/drivers/iio/adc/ad7192.c index b64fd365f83fb..ecaf87af539b0 100644 --- a/drivers/iio/adc/ad7192.c +++ b/drivers/iio/adc/ad7192.c @@ -16,7 +16,9 @@ #include #include #include -#include +#include +#include +#include #include #include @@ -360,19 +362,19 @@ static inline bool ad7192_valid_external_frequency(u32 freq) freq <= AD7192_EXT_FREQ_MHZ_MAX); } -static int ad7192_of_clock_select(struct ad7192_state *st) +static int ad7192_clock_select(struct ad7192_state *st) { - struct device_node *np = st->sd.spi->dev.of_node; + struct device *dev = &st->sd.spi->dev; unsigned int clock_sel; clock_sel = AD7192_CLK_INT; /* use internal clock */ if (!st->mclk) { - if (of_property_read_bool(np, "adi,int-clock-output-enable")) + if (device_property_read_bool(dev, "adi,int-clock-output-enable")) clock_sel = AD7192_CLK_INT_CO; } else { - if (of_property_read_bool(np, "adi,clock-xtal")) + if (device_property_read_bool(dev, "adi,clock-xtal")) clock_sel = AD7192_CLK_EXT_MCLK1_2; else clock_sel = AD7192_CLK_EXT_MCLK2; @@ -381,7 +383,7 @@ static int ad7192_of_clock_select(struct ad7192_state *st) return clock_sel; } -static int ad7192_setup(struct iio_dev *indio_dev, struct device_node *np) +static int ad7192_setup(struct iio_dev *indio_dev, struct device *dev) { struct ad7192_state *st = iio_priv(indio_dev); bool rej60_en, refin2_en; @@ -403,7 +405,7 @@ static int ad7192_setup(struct iio_dev *indio_dev, struct device_node *np) id &= AD7192_ID_MASK; if (id != st->chip_info->chip_id) - dev_warn(&st->sd.spi->dev, "device ID query failed (0x%X != 0x%X)\n", + dev_warn(dev, "device ID query failed (0x%X != 0x%X)\n", id, st->chip_info->chip_id); st->mode = AD7192_MODE_SEL(AD7192_MODE_IDLE) | @@ -412,31 +414,31 @@ static int ad7192_setup(struct iio_dev *indio_dev, struct device_node *np) st->conf = AD7192_CONF_GAIN(0); - rej60_en = of_property_read_bool(np, "adi,rejection-60-Hz-enable"); + rej60_en = device_property_read_bool(dev, "adi,rejection-60-Hz-enable"); if (rej60_en) st->mode |= AD7192_MODE_REJ60; - refin2_en = of_property_read_bool(np, "adi,refin2-pins-enable"); + refin2_en = device_property_read_bool(dev, "adi,refin2-pins-enable"); if (refin2_en && st->chip_info->chip_id != CHIPID_AD7195) st->conf |= AD7192_CONF_REFSEL; st->conf &= ~AD7192_CONF_CHOP; st->f_order = AD7192_NO_SYNC_FILTER; - buf_en = of_property_read_bool(np, "adi,buffer-enable"); + buf_en = device_property_read_bool(dev, "adi,buffer-enable"); if (buf_en) st->conf |= AD7192_CONF_BUF; - bipolar = of_property_read_bool(np, "bipolar"); + bipolar = device_property_read_bool(dev, "bipolar"); if (!bipolar) st->conf |= AD7192_CONF_UNIPOLAR; - burnout_curr_en = of_property_read_bool(np, - "adi,burnout-currents-enable"); + burnout_curr_en = device_property_read_bool(dev, + "adi,burnout-currents-enable"); if (burnout_curr_en && buf_en) { st->conf |= AD7192_CONF_BURN; } else if (burnout_curr_en) { - dev_warn(&st->sd.spi->dev, + dev_warn(dev, "Can't enable burnout currents: see CHOP or buffer\n"); } @@ -1036,9 +1038,7 @@ static int ad7192_probe(struct spi_device *spi) } st->int_vref_mv = ret / 1000; - st->chip_info = of_device_get_match_data(&spi->dev); - if (!st->chip_info) - st->chip_info = (void *)spi_get_device_id(spi)->driver_data; + st->chip_info = spi_get_device_match_data(spi); indio_dev->name = st->chip_info->name; indio_dev->modes = INDIO_DIRECT_MODE; @@ -1065,7 +1065,7 @@ static int ad7192_probe(struct spi_device *spi) if (IS_ERR(st->mclk)) return PTR_ERR(st->mclk); - st->clock_sel = ad7192_of_clock_select(st); + st->clock_sel = ad7192_clock_select(st); if (st->clock_sel == AD7192_CLK_EXT_MCLK1_2 || st->clock_sel == AD7192_CLK_EXT_MCLK2) { @@ -1077,7 +1077,7 @@ static int ad7192_probe(struct spi_device *spi) } } - ret = ad7192_setup(indio_dev, spi->dev.of_node); + ret = ad7192_setup(indio_dev, &spi->dev); if (ret) return ret; From 9f2030a2a163d0d7c6f1b50f9a68749b112e4ce2 Mon Sep 17 00:00:00 2001 From: Nuno Sa Date: Mon, 14 Oct 2024 17:01:21 +0200 Subject: [PATCH 055/216] iio: adc: ad7192: properly check spi_get_device_match_data() [ Upstream commit b7f99fa1b64af2f696b13cec581cb4cd7d3982b8 ] spi_get_device_match_data() can return a NULL pointer. Hence, let's check for it. Signed-off-by: Nuno Sa Link: https://patch.msgid.link/20241014-fix-error-check-v1-1-089e1003d12f@analog.com Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin (cherry picked from commit c47940e84398c6094b8b3abdd16282482637c883) --- drivers/iio/adc/ad7192.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iio/adc/ad7192.c b/drivers/iio/adc/ad7192.c index ecaf87af539b0..fa6810aa6a4a7 100644 --- a/drivers/iio/adc/ad7192.c +++ b/drivers/iio/adc/ad7192.c @@ -1039,6 +1039,9 @@ static int ad7192_probe(struct spi_device *spi) st->int_vref_mv = ret / 1000; st->chip_info = spi_get_device_match_data(spi); + if (!st->chip_info) + return -ENODEV; + indio_dev->name = st->chip_info->name; indio_dev->modes = INDIO_DIRECT_MODE; From 39682d03cb56335740265800c2a5ef547efff582 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Thu, 11 Apr 2024 07:49:53 +0300 Subject: [PATCH 056/216] usb: typec: ucsi: add callback for connector status updates [ Upstream commit 24bce22d09ec8e67022aab9a888acb56fb7a996a ] Allow UCSI glue driver to perform addtional work to update connector status. For example, it might check the cable orientation. This call is performed after reading new connector statatus, so the platform driver can peek at new connection status bits. The callback is called both when registering the port and when the connector change event is being handled. Signed-off-by: Dmitry Baryshkov Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20240411-ucsi-orient-aware-v2-1-d4b1cb22a33f@linaro.org Signed-off-by: Greg Kroah-Hartman Stable-dep-of: de9df030ccb5 ("usb: typec: ucsi: glink: be more precise on orientation-aware ports") Signed-off-by: Sasha Levin (cherry picked from commit 8dd7fc5e409bea3d74c0c1fa78f19a5f92e02a1a) --- drivers/usb/typec/ucsi/ucsi.c | 6 ++++++ drivers/usb/typec/ucsi/ucsi.h | 3 +++ 2 files changed, 9 insertions(+) diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index f6fb5575d4f0a..3f7039a711c7e 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -903,6 +903,9 @@ static void ucsi_handle_connector_change(struct work_struct *work) trace_ucsi_connector_change(con->num, &con->status); + if (ucsi->ops->connector_status) + ucsi->ops->connector_status(con); + role = !!(con->status.flags & UCSI_CONSTAT_PWR_DIR); if (con->status.change & UCSI_CONSTAT_POWER_DIR_CHANGE) { @@ -1322,6 +1325,9 @@ static int ucsi_register_port(struct ucsi *ucsi, struct ucsi_connector *con) } ret = 0; /* ucsi_send_command() returns length on success */ + if (ucsi->ops->connector_status) + ucsi->ops->connector_status(con); + switch (UCSI_CONSTAT_PARTNER_TYPE(con->status.flags)) { case UCSI_CONSTAT_PARTNER_TYPE_UFP: case UCSI_CONSTAT_PARTNER_TYPE_CABLE_AND_UFP: diff --git a/drivers/usb/typec/ucsi/ucsi.h b/drivers/usb/typec/ucsi/ucsi.h index 42c60eba5fb6e..3d23b52cf5a96 100644 --- a/drivers/usb/typec/ucsi/ucsi.h +++ b/drivers/usb/typec/ucsi/ucsi.h @@ -15,6 +15,7 @@ struct ucsi; struct ucsi_altmode; +struct ucsi_connector; struct dentry; /* UCSI offsets (Bytes) */ @@ -52,6 +53,7 @@ struct dentry; * @sync_write: Blocking write operation * @async_write: Non-blocking write operation * @update_altmodes: Squashes duplicate DP altmodes + * @connector_status: Updates connector status, called holding connector lock * * Read and write routines for UCSI interface. @sync_write must wait for the * Command Completion Event from the PPM before returning, and @async_write must @@ -66,6 +68,7 @@ struct ucsi_operations { const void *val, size_t val_len); bool (*update_altmodes)(struct ucsi *ucsi, struct ucsi_altmode *orig, struct ucsi_altmode *updated); + void (*connector_status)(struct ucsi_connector *con); }; struct ucsi *ucsi_create(struct device *dev, const struct ucsi_operations *ops); From 6d5a681c8ceddd1e22046d2bf8f0724685b42718 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Thu, 11 Apr 2024 07:49:54 +0300 Subject: [PATCH 057/216] usb: typec: ucsi: glink: move GPIO reading into connector_status callback [ Upstream commit 76716fd5bf09725c2c6825264147f16c21e56853 ] To simplify the platform code move Type-C orientation handling into the connector_status callback. As it is called both during connector registration and on connector change events, duplicated code from pmic_glink_ucsi_register() can be dropped. Also this moves operations that can sleep into a worker thread, removing the only sleeping operation from pmic_glink_ucsi_notify(). Tested-by: Krishna Kurapati Signed-off-by: Dmitry Baryshkov Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20240411-ucsi-orient-aware-v2-2-d4b1cb22a33f@linaro.org Signed-off-by: Greg Kroah-Hartman Stable-dep-of: de9df030ccb5 ("usb: typec: ucsi: glink: be more precise on orientation-aware ports") Signed-off-by: Sasha Levin (cherry picked from commit fd662c37a1087a2631cd2544138650b153e65f90) --- drivers/usb/typec/ucsi/ucsi_glink.c | 48 ++++++++++++----------------- 1 file changed, 20 insertions(+), 28 deletions(-) diff --git a/drivers/usb/typec/ucsi/ucsi_glink.c b/drivers/usb/typec/ucsi/ucsi_glink.c index 94f2df02f06ee..4c9352cdd6415 100644 --- a/drivers/usb/typec/ucsi/ucsi_glink.c +++ b/drivers/usb/typec/ucsi/ucsi_glink.c @@ -186,10 +186,28 @@ static int pmic_glink_ucsi_sync_write(struct ucsi *__ucsi, unsigned int offset, return ret; } +static void pmic_glink_ucsi_connector_status(struct ucsi_connector *con) +{ + struct pmic_glink_ucsi *ucsi = ucsi_get_drvdata(con->ucsi); + int orientation; + + if (con->num >= PMIC_GLINK_MAX_PORTS || + !ucsi->port_orientation[con->num - 1]) + return; + + orientation = gpiod_get_value(ucsi->port_orientation[con->num - 1]); + if (orientation >= 0) { + typec_switch_set(ucsi->port_switch[con->num - 1], + orientation ? TYPEC_ORIENTATION_REVERSE + : TYPEC_ORIENTATION_NORMAL); + } +} + static const struct ucsi_operations pmic_glink_ucsi_ops = { .read = pmic_glink_ucsi_read, .sync_write = pmic_glink_ucsi_sync_write, - .async_write = pmic_glink_ucsi_async_write + .async_write = pmic_glink_ucsi_async_write, + .connector_status = pmic_glink_ucsi_connector_status, }; static void pmic_glink_ucsi_read_ack(struct pmic_glink_ucsi *ucsi, const void *data, int len) @@ -228,20 +246,8 @@ static void pmic_glink_ucsi_notify(struct work_struct *work) } con_num = UCSI_CCI_CONNECTOR(cci); - if (con_num) { - if (con_num <= PMIC_GLINK_MAX_PORTS && - ucsi->port_orientation[con_num - 1]) { - int orientation = gpiod_get_value(ucsi->port_orientation[con_num - 1]); - - if (orientation >= 0) { - typec_switch_set(ucsi->port_switch[con_num - 1], - orientation ? TYPEC_ORIENTATION_REVERSE - : TYPEC_ORIENTATION_NORMAL); - } - } - + if (con_num) ucsi_connector_change(ucsi->ucsi, con_num); - } if (ucsi->sync_pending && (cci & (UCSI_CCI_ACK_COMPLETE | UCSI_CCI_COMMAND_COMPLETE))) { @@ -252,20 +258,6 @@ static void pmic_glink_ucsi_notify(struct work_struct *work) static void pmic_glink_ucsi_register(struct work_struct *work) { struct pmic_glink_ucsi *ucsi = container_of(work, struct pmic_glink_ucsi, register_work); - int orientation; - int i; - - for (i = 0; i < PMIC_GLINK_MAX_PORTS; i++) { - if (!ucsi->port_orientation[i]) - continue; - orientation = gpiod_get_value(ucsi->port_orientation[i]); - - if (orientation >= 0) { - typec_switch_set(ucsi->port_switch[i], - orientation ? TYPEC_ORIENTATION_REVERSE - : TYPEC_ORIENTATION_NORMAL); - } - } ucsi_register(ucsi->ucsi); } From 4d611cd5d08dc24a02a29acec3f44e4f5257d4a3 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Thu, 11 Apr 2024 07:49:56 +0300 Subject: [PATCH 058/216] usb: typec: ucsi: add update_connector callback [ Upstream commit 62866465196228917f233aea68de73be6cdb9fae ] Add a callback to allow glue drivers to update the connector before registering corresponding power supply and Type-C port. In particular this is useful if glue drivers want to touch the connector's Type-C capabilities structure. Signed-off-by: Dmitry Baryshkov Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20240411-ucsi-orient-aware-v2-4-d4b1cb22a33f@linaro.org Signed-off-by: Greg Kroah-Hartman Stable-dep-of: de9df030ccb5 ("usb: typec: ucsi: glink: be more precise on orientation-aware ports") Signed-off-by: Sasha Levin (cherry picked from commit 01059e0b5cc00f128f5b37e51ce60e21727f7773) --- drivers/usb/typec/ucsi/ucsi.c | 3 +++ drivers/usb/typec/ucsi/ucsi.h | 2 ++ 2 files changed, 5 insertions(+) diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index 3f7039a711c7e..d6a3fd00c3a5c 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -1261,6 +1261,9 @@ static int ucsi_register_port(struct ucsi *ucsi, struct ucsi_connector *con) cap->driver_data = con; cap->ops = &ucsi_ops; + if (ucsi->ops->update_connector) + ucsi->ops->update_connector(con); + ret = ucsi_register_port_psy(con); if (ret) goto out; diff --git a/drivers/usb/typec/ucsi/ucsi.h b/drivers/usb/typec/ucsi/ucsi.h index 3d23b52cf5a96..921ef0e115cff 100644 --- a/drivers/usb/typec/ucsi/ucsi.h +++ b/drivers/usb/typec/ucsi/ucsi.h @@ -53,6 +53,7 @@ struct dentry; * @sync_write: Blocking write operation * @async_write: Non-blocking write operation * @update_altmodes: Squashes duplicate DP altmodes + * @update_connector: Update connector capabilities before registering * @connector_status: Updates connector status, called holding connector lock * * Read and write routines for UCSI interface. @sync_write must wait for the @@ -68,6 +69,7 @@ struct ucsi_operations { const void *val, size_t val_len); bool (*update_altmodes)(struct ucsi *ucsi, struct ucsi_altmode *orig, struct ucsi_altmode *updated); + void (*update_connector)(struct ucsi_connector *con); void (*connector_status)(struct ucsi_connector *con); }; From 0a4b686ae31cb8348a6f9e3cd8024c8c78282dbf Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Thu, 11 Apr 2024 07:49:57 +0300 Subject: [PATCH 059/216] usb: typec: ucsi: glink: set orientation aware if supported [ Upstream commit 3d1b6c9d47707d6a0f80bb5db6473b1f107b5baf ] If the PMIC-GLINK device has orientation GPIOs declared, then it will report connection orientation. In this case set the flag to mark registered ports as orientation-aware. Signed-off-by: Dmitry Baryshkov Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20240411-ucsi-orient-aware-v2-5-d4b1cb22a33f@linaro.org Signed-off-by: Greg Kroah-Hartman Stable-dep-of: de9df030ccb5 ("usb: typec: ucsi: glink: be more precise on orientation-aware ports") Signed-off-by: Sasha Levin (cherry picked from commit 7723988b012783dd6dc1a628cf8f4ace884a7743) --- drivers/usb/typec/ucsi/ucsi_glink.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/usb/typec/ucsi/ucsi_glink.c b/drivers/usb/typec/ucsi/ucsi_glink.c index 4c9352cdd6415..f6c3af5846e67 100644 --- a/drivers/usb/typec/ucsi/ucsi_glink.c +++ b/drivers/usb/typec/ucsi/ucsi_glink.c @@ -186,6 +186,17 @@ static int pmic_glink_ucsi_sync_write(struct ucsi *__ucsi, unsigned int offset, return ret; } +static void pmic_glink_ucsi_update_connector(struct ucsi_connector *con) +{ + struct pmic_glink_ucsi *ucsi = ucsi_get_drvdata(con->ucsi); + int i; + + for (i = 0; i < PMIC_GLINK_MAX_PORTS; i++) { + if (ucsi->port_orientation[i]) + con->typec_cap.orientation_aware = true; + } +} + static void pmic_glink_ucsi_connector_status(struct ucsi_connector *con) { struct pmic_glink_ucsi *ucsi = ucsi_get_drvdata(con->ucsi); @@ -207,6 +218,7 @@ static const struct ucsi_operations pmic_glink_ucsi_ops = { .read = pmic_glink_ucsi_read, .sync_write = pmic_glink_ucsi_sync_write, .async_write = pmic_glink_ucsi_async_write, + .update_connector = pmic_glink_ucsi_update_connector, .connector_status = pmic_glink_ucsi_connector_status, }; From 2c4ceffb52b7d56cd2c7b9dec7c3f938a4e650d3 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sat, 9 Nov 2024 02:04:15 +0200 Subject: [PATCH 060/216] usb: typec: ucsi: glink: be more precise on orientation-aware ports [ Upstream commit de9df030ccb5d3e31ee0c715d74cd77c619748f8 ] Instead of checking if any of the USB-C ports have orientation GPIO and thus is orientation-aware, check for the GPIO for the port being registered. There are no boards that are affected by this change at this moment, so the patch is not marked as a fix, but it might affect other boards in future. Reviewed-by: Abel Vesa Reviewed-by: Neil Armstrong Reviewed-by: Johan Hovold Tested-by: Johan Hovold Signed-off-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20241109-ucsi-glue-fixes-v2-2-8b21ff4f9fbe@linaro.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin (cherry picked from commit 2c276bef82736dc09b32be687bdca7382fc04391) --- drivers/usb/typec/ucsi/ucsi_glink.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/usb/typec/ucsi/ucsi_glink.c b/drivers/usb/typec/ucsi/ucsi_glink.c index f6c3af5846e67..f0b4d0a4bb198 100644 --- a/drivers/usb/typec/ucsi/ucsi_glink.c +++ b/drivers/usb/typec/ucsi/ucsi_glink.c @@ -189,12 +189,12 @@ static int pmic_glink_ucsi_sync_write(struct ucsi *__ucsi, unsigned int offset, static void pmic_glink_ucsi_update_connector(struct ucsi_connector *con) { struct pmic_glink_ucsi *ucsi = ucsi_get_drvdata(con->ucsi); - int i; - for (i = 0; i < PMIC_GLINK_MAX_PORTS; i++) { - if (ucsi->port_orientation[i]) - con->typec_cap.orientation_aware = true; - } + if (con->num > PMIC_GLINK_MAX_PORTS || + !ucsi->port_orientation[con->num - 1]) + return; + + con->typec_cap.orientation_aware = true; } static void pmic_glink_ucsi_connector_status(struct ucsi_connector *con) From f346b65c162a494ccba1989db61a2387718bc193 Mon Sep 17 00:00:00 2001 From: Nilay Shroff Date: Wed, 16 Oct 2024 08:33:16 +0530 Subject: [PATCH 061/216] nvme: use helper nvme_ctrl_state in nvme_keep_alive_finish function [ Upstream commit 599d9f3a10eec69ef28a90161763e4bd7c9c02bf ] We no more need acquiring ctrl->lock before accessing the NVMe controller state and instead we can now use the helper nvme_ctrl_state. So replace the use of ctrl->lock from nvme_keep_alive_finish function with nvme_ctrl_state call. Reviewed-by: Christoph Hellwig Signed-off-by: Nilay Shroff Signed-off-by: Keith Busch Stable-dep-of: 84488282166d ("Revert "nvme: make keep-alive synchronous operation"") Signed-off-by: Sasha Levin (cherry picked from commit 801acf741c879a09f05df9b49f78142407d630c2) --- drivers/nvme/host/core.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 5b6a6bd4e6e80..ae494c799fc51 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1181,10 +1181,9 @@ static void nvme_queue_keep_alive_work(struct nvme_ctrl *ctrl) static void nvme_keep_alive_finish(struct request *rq, blk_status_t status, struct nvme_ctrl *ctrl) { - unsigned long flags; - bool startka = false; unsigned long rtt = jiffies - (rq->deadline - rq->timeout); unsigned long delay = nvme_keep_alive_work_period(ctrl); + enum nvme_ctrl_state state = nvme_ctrl_state(ctrl); /* * Subtract off the keepalive RTT so nvme_keep_alive_work runs @@ -1207,12 +1206,7 @@ static void nvme_keep_alive_finish(struct request *rq, ctrl->ka_last_check_time = jiffies; ctrl->comp_seen = false; - spin_lock_irqsave(&ctrl->lock, flags); - if (ctrl->state == NVME_CTRL_LIVE || - ctrl->state == NVME_CTRL_CONNECTING) - startka = true; - spin_unlock_irqrestore(&ctrl->lock, flags); - if (startka) + if (state == NVME_CTRL_LIVE || state == NVME_CTRL_CONNECTING) queue_delayed_work(nvme_wq, &ctrl->ka_work, delay); } From 1b31d1e3d12dc821bec40624e60b82074ba466a9 Mon Sep 17 00:00:00 2001 From: Nilay Shroff Date: Tue, 5 Nov 2024 11:42:08 +0530 Subject: [PATCH 062/216] Revert "nvme: make keep-alive synchronous operation" [ Upstream commit 84488282166de6b6760ada8030e87aaa08bce3aa ] This reverts commit d06923670b5a5f609603d4a9fee4dec02d38de9c. It was realized that the fix implemented to contain the race condition among the keep alive task and the fabric shutdown code path in the commit d06923670b5ia ("nvme: make keep-alive synchronous operation") is not optimal. The reason being keep-alive runs under the workqueue and making it synchronous would waste a workqueue context. Furthermore, we later found that the above race condition is a regression caused due to the changes implemented in commit a54a93d0e359 ("nvme: move stopping keep-alive into nvme_uninit_ctrl()"). So we decided to revert the commit d06923670b5a ("nvme: make keep-alive synchronous operation") and then fix the regression. Link: https://lore.kernel.org/all/196f4013-3bbf-43ff-98b4-9cb2a96c20c2@grimberg.me/ Reviewed-by: Ming Lei Signed-off-by: Nilay Shroff Signed-off-by: Keith Busch Signed-off-by: Sasha Levin (cherry picked from commit ddcc7d71be31044a97bfa67855d9043de188df13) --- drivers/nvme/host/core.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index ae494c799fc51..4aad16390d479 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1178,9 +1178,10 @@ static void nvme_queue_keep_alive_work(struct nvme_ctrl *ctrl) nvme_keep_alive_work_period(ctrl)); } -static void nvme_keep_alive_finish(struct request *rq, - blk_status_t status, struct nvme_ctrl *ctrl) +static enum rq_end_io_ret nvme_keep_alive_end_io(struct request *rq, + blk_status_t status) { + struct nvme_ctrl *ctrl = rq->end_io_data; unsigned long rtt = jiffies - (rq->deadline - rq->timeout); unsigned long delay = nvme_keep_alive_work_period(ctrl); enum nvme_ctrl_state state = nvme_ctrl_state(ctrl); @@ -1197,17 +1198,20 @@ static void nvme_keep_alive_finish(struct request *rq, delay = 0; } + blk_mq_free_request(rq); + if (status) { dev_err(ctrl->device, "failed nvme_keep_alive_end_io error=%d\n", status); - return; + return RQ_END_IO_NONE; } ctrl->ka_last_check_time = jiffies; ctrl->comp_seen = false; if (state == NVME_CTRL_LIVE || state == NVME_CTRL_CONNECTING) queue_delayed_work(nvme_wq, &ctrl->ka_work, delay); + return RQ_END_IO_NONE; } static void nvme_keep_alive_work(struct work_struct *work) @@ -1216,7 +1220,6 @@ static void nvme_keep_alive_work(struct work_struct *work) struct nvme_ctrl, ka_work); bool comp_seen = ctrl->comp_seen; struct request *rq; - blk_status_t status; ctrl->ka_last_check_time = jiffies; @@ -1239,9 +1242,9 @@ static void nvme_keep_alive_work(struct work_struct *work) nvme_init_request(rq, &ctrl->ka_cmd); rq->timeout = ctrl->kato * HZ; - status = blk_execute_rq(rq, false); - nvme_keep_alive_finish(rq, status, ctrl); - blk_mq_free_request(rq); + rq->end_io = nvme_keep_alive_end_io; + rq->end_io_data = ctrl; + blk_execute_rq_nowait(rq, false); } static void nvme_start_keep_alive(struct nvme_ctrl *ctrl) From 396442e438851e176c6f4ad54e3075e45daeccd5 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Wed, 23 Oct 2024 15:41:46 +0200 Subject: [PATCH 063/216] net/mlx5: unique names for per device caches [ Upstream commit 25872a079bbbe952eb660249cc9f40fa75623e68 ] Add the device name to the per device kmem_cache names to ensure their uniqueness. This fixes warnings like this: "kmem_cache of name 'mlx5_fs_fgs' already exists". Signed-off-by: Sebastian Ott Reviewed-by: Breno Leitao Reviewed-by: Tariq Toukan Link: https://patch.msgid.link/20241023134146.28448-1-sebott@redhat.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin (cherry picked from commit d6616dcd87216e444dda10de1c84e5ecb8c510b1) --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 991250f44c2ed..474e63d02ba49 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -3478,6 +3478,7 @@ void mlx5_fs_core_free(struct mlx5_core_dev *dev) int mlx5_fs_core_alloc(struct mlx5_core_dev *dev) { struct mlx5_flow_steering *steering; + char name[80]; int err = 0; err = mlx5_init_fc_stats(dev); @@ -3502,10 +3503,12 @@ int mlx5_fs_core_alloc(struct mlx5_core_dev *dev) else steering->mode = MLX5_FLOW_STEERING_MODE_DMFS; - steering->fgs_cache = kmem_cache_create("mlx5_fs_fgs", + snprintf(name, sizeof(name), "%s-mlx5_fs_fgs", dev_name(dev->device)); + steering->fgs_cache = kmem_cache_create(name, sizeof(struct mlx5_flow_group), 0, 0, NULL); - steering->ftes_cache = kmem_cache_create("mlx5_fs_ftes", sizeof(struct fs_fte), 0, + snprintf(name, sizeof(name), "%s-mlx5_fs_ftes", dev_name(dev->device)); + steering->ftes_cache = kmem_cache_create(name, sizeof(struct fs_fte), 0, 0, NULL); if (!steering->ftes_cache || !steering->fgs_cache) { err = -ENOMEM; From 4f3c0a5c86e26f06bbf49405dd898cccee9c1716 Mon Sep 17 00:00:00 2001 From: K Prateek Nayak Date: Tue, 19 Nov 2024 05:44:29 +0000 Subject: [PATCH 064/216] softirq: Allow raising SCHED_SOFTIRQ from SMP-call-function on RT kernel [ Upstream commit 6675ce20046d149e1e1ffe7e9577947dee17aad5 ] do_softirq_post_smp_call_flush() on PREEMPT_RT kernels carries a WARN_ON_ONCE() for any SOFTIRQ being raised from an SMP-call-function. Since do_softirq_post_smp_call_flush() is called with preempt disabled, raising a SOFTIRQ during flush_smp_call_function_queue() can lead to longer preempt disabled sections. Since commit b2a02fc43a1f ("smp: Optimize send_call_function_single_ipi()") IPIs to an idle CPU in TIF_POLLING_NRFLAG mode can be optimized out by instead setting TIF_NEED_RESCHED bit in idle task's thread_info and relying on the flush_smp_call_function_queue() in the idle-exit path to run the SMP-call-function. To trigger an idle load balancing, the scheduler queues nohz_csd_function() responsible for triggering an idle load balancing on a target nohz idle CPU and sends an IPI. Only now, this IPI is optimized out and the SMP-call-function is executed from flush_smp_call_function_queue() in do_idle() which can raise a SCHED_SOFTIRQ to trigger the balancing. So far, this went undetected since, the need_resched() check in nohz_csd_function() would make it bail out of idle load balancing early as the idle thread does not clear TIF_POLLING_NRFLAG before calling flush_smp_call_function_queue(). The need_resched() check was added with the intent to catch a new task wakeup, however, it has recently discovered to be unnecessary and will be removed in the subsequent commit after which nohz_csd_function() can raise a SCHED_SOFTIRQ from flush_smp_call_function_queue() to trigger an idle load balance on an idle target in TIF_POLLING_NRFLAG mode. nohz_csd_function() bails out early if "idle_cpu()" check for the target CPU, and does not lock the target CPU's rq until the very end, once it has found tasks to run on the CPU and will not inhibit the wakeup of, or running of a newly woken up higher priority task. Account for this and prevent a WARN_ON_ONCE() when SCHED_SOFTIRQ is raised from flush_smp_call_function_queue(). Signed-off-by: K Prateek Nayak Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20241119054432.6405-2-kprateek.nayak@amd.com Signed-off-by: Sasha Levin (cherry picked from commit 3dd65ffa2df654bea474ee797c6e9de2170177bf) --- kernel/softirq.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/kernel/softirq.c b/kernel/softirq.c index bd9716d7bb638..f24d80cf20bd3 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -279,17 +279,24 @@ static inline void invoke_softirq(void) wakeup_softirqd(); } +#define SCHED_SOFTIRQ_MASK BIT(SCHED_SOFTIRQ) + /* * flush_smp_call_function_queue() can raise a soft interrupt in a function - * call. On RT kernels this is undesired and the only known functionality - * in the block layer which does this is disabled on RT. If soft interrupts - * get raised which haven't been raised before the flush, warn so it can be + * call. On RT kernels this is undesired and the only known functionalities + * are in the block layer which is disabled on RT, and in the scheduler for + * idle load balancing. If soft interrupts get raised which haven't been + * raised before the flush, warn if it is not a SCHED_SOFTIRQ so it can be * investigated. */ void do_softirq_post_smp_call_flush(unsigned int was_pending) { - if (WARN_ON_ONCE(was_pending != local_softirq_pending())) + unsigned int is_pending = local_softirq_pending(); + + if (unlikely(was_pending != is_pending)) { + WARN_ON_ONCE(was_pending != (is_pending & ~SCHED_SOFTIRQ_MASK)); invoke_softirq(); + } } #else /* CONFIG_PREEMPT_RT */ From ddf9d03525f96b37819f66d577077b9a3b78208d Mon Sep 17 00:00:00 2001 From: Nikita Yushchenko Date: Sun, 8 Dec 2024 14:50:01 +0500 Subject: [PATCH 065/216] net: renesas: rswitch: fix possible early skb release [ Upstream commit 5cb099902b6b6292b3a85ffa1bb844e0ba195945 ] When sending frame split into multiple descriptors, hardware processes descriptors one by one, including writing back DT values. The first descriptor could be already marked as completed when processing of next descriptors for the same frame is still in progress. Although only the last descriptor is configured to generate interrupt, completion of the first descriptor could be noticed by the driver when handling interrupt for the previous frame. Currently, driver stores skb in the entry that corresponds to the first descriptor. This results into skb could be unmapped and freed when hardware did not complete the send yet. This opens a window for corrupting the data being sent. Fix this by saving skb in the entry that corresponds to the last descriptor used to send the frame. Fixes: d2c96b9d5f83 ("net: rswitch: Add jumbo frames handling for TX") Signed-off-by: Nikita Yushchenko Reviewed-by: Yoshihiro Shimoda Link: https://patch.msgid.link/20241208095004.69468-2-nikita.yoush@cogentembedded.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin (cherry picked from commit 43e3aa2f44d60421c3e0d14bf92c72a195015486) --- drivers/net/ethernet/renesas/rswitch.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c index 54aa56c841334..2f483531d95cb 100644 --- a/drivers/net/ethernet/renesas/rswitch.c +++ b/drivers/net/ethernet/renesas/rswitch.c @@ -1632,8 +1632,9 @@ static netdev_tx_t rswitch_start_xmit(struct sk_buff *skb, struct net_device *nd if (dma_mapping_error(ndev->dev.parent, dma_addr_orig)) goto err_kfree; - gq->skbs[gq->cur] = skb; - gq->unmap_addrs[gq->cur] = dma_addr_orig; + /* Stored the skb at the last descriptor to avoid skb free before hardware completes send */ + gq->skbs[(gq->cur + nr_desc - 1) % gq->ring_size] = skb; + gq->unmap_addrs[(gq->cur + nr_desc - 1) % gq->ring_size] = dma_addr_orig; dma_wmb(); From b2d42d500719bad59e4cfb9f251afe6f30bd0067 Mon Sep 17 00:00:00 2001 From: Michal Pecio Date: Thu, 29 Feb 2024 16:14:36 +0200 Subject: [PATCH 066/216] xhci: retry Stop Endpoint on buggy NEC controllers [ Upstream commit fd9d55d190c0e5fefd3a9165ea361809427885a1 ] Two NEC uPD720200 adapters have been observed to randomly misbehave: a Stop Endpoint command fails with Context Error, the Output Context indicates Stopped state, and the endpoint keeps running. Very often, Set TR Dequeue Pointer is seen to fail next with Context Error too, in addition to problems from unexpectedly completed cancelled work. The pathology is common on fast running isoc endpoints like uvcvideo, but has also been reproduced on a full-speed bulk endpoint of pl2303. It seems all EPs are affected, with risk proportional to their load. Reproduction involves receiving any kind of stream and closing it to make the device driver cancel URBs already queued in advance. Deal with it by retrying the command like in the Running state. Signed-off-by: Michal Pecio Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20240229141438.619372-8-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman Stable-dep-of: e21ebe51af68 ("xhci: Turn NEC specific quirk for handling Stop Endpoint errors generic") Signed-off-by: Sasha Levin (cherry picked from commit 61329b25dc1dc93a35167189e0295166003edb29) --- drivers/usb/host/xhci-ring.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 4d387eb5409b5..8e16520a960dd 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -1180,6 +1180,15 @@ static void xhci_handle_cmd_stop_ep(struct xhci_hcd *xhci, int slot_id, break; ep->ep_state &= ~EP_STOP_CMD_PENDING; return; + case EP_STATE_STOPPED: + /* + * NEC uPD720200 sometimes sets this state and fails with + * Context Error while continuing to process TRBs. + * Be conservative and trust EP_CTX_STATE on other chips. + */ + if (!(xhci->quirks & XHCI_NEC_HOST)) + break; + fallthrough; case EP_STATE_RUNNING: /* Race, HW handled stop ep cmd before ep was running */ xhci_dbg(xhci, "Stop ep completion ctx error, ep is running\n"); From 40b86e2c07f1ff425c928ea94f6a5ab93801427a Mon Sep 17 00:00:00 2001 From: Michal Pecio Date: Wed, 6 Nov 2024 12:14:57 +0200 Subject: [PATCH 067/216] usb: xhci: Limit Stop Endpoint retries [ Upstream commit 42b7581376015c1bbcbe5831f043cd0ac119d028 ] Some host controllers fail to atomically transition an endpoint to the Running state on a doorbell ring and enter a hidden "Restarting" state, which looks very much like Stopped, with the important difference that it will spontaneously transition to Running anytime soon. A Stop Endpoint command queued in the Restarting state typically fails with Context State Error and the completion handler sees the Endpoint Context State as either still Stopped or already Running. Even a case of Halted was observed, when an error occurred right after the restart. The Halted state is already recovered from by resetting the endpoint. The Running state is handled by retrying Stop Endpoint. The Stopped state was recognized as a problem on NEC controllers and worked around also by retrying, because the endpoint soon restarts and then stops for good. But there is a risk: the command may fail if the endpoint is "stopped for good" already, and retries will fail forever. The possibility of this was not realized at the time, but a number of cases were discovered later and reproduced. Some proved difficult to deal with, and it is outright impossible to predict if an endpoint may fail to ever start at all due to a hardware bug. One such bug (albeit on ASM3142, not on NEC) was found to be reliably triggered simply by toggling an AX88179 NIC up/down in a tight loop for a few seconds. An endless retries storm is quite nasty. Besides putting needless load on the xHC and CPU, it causes URBs never to be given back, paralyzing the device and connection/disconnection logic for the whole bus if the device is unplugged. User processes waiting for URBs become unkillable, drivers and kworker threads lock up and xhci_hcd cannot be reloaded. For peace of mind, impose a timeout on Stop Endpoint retries in this case. If they don't succeed in 100ms, consider the endpoint stopped permanently for some reason and just give back the unlinked URBs. This failure case is rare already and work is under way to make it rarer. Start this work today by also handling one simple case of race with Reset Endpoint, because it costs just two lines to implement. Fixes: fd9d55d190c0 ("xhci: retry Stop Endpoint on buggy NEC controllers") CC: stable@vger.kernel.org Signed-off-by: Michal Pecio Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20241106101459.775897-32-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman Stable-dep-of: e21ebe51af68 ("xhci: Turn NEC specific quirk for handling Stop Endpoint errors generic") Signed-off-by: Sasha Levin (cherry picked from commit f1ece345ad2c5820d10eb29f07bebc378295d5be) --- drivers/usb/host/xhci-ring.c | 28 ++++++++++++++++++++++++---- drivers/usb/host/xhci.c | 2 ++ drivers/usb/host/xhci.h | 1 + 3 files changed, 27 insertions(+), 4 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 8e16520a960dd..f5cad2e8413b3 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -52,6 +52,7 @@ * endpoint rings; it generates events on the event ring for these. */ +#include #include #include #include @@ -1182,16 +1183,35 @@ static void xhci_handle_cmd_stop_ep(struct xhci_hcd *xhci, int slot_id, return; case EP_STATE_STOPPED: /* - * NEC uPD720200 sometimes sets this state and fails with - * Context Error while continuing to process TRBs. - * Be conservative and trust EP_CTX_STATE on other chips. + * Per xHCI 4.6.9, Stop Endpoint command on a Stopped + * EP is a Context State Error, and EP stays Stopped. + * + * But maybe it failed on Halted, and somebody ran Reset + * Endpoint later. EP state is now Stopped and EP_HALTED + * still set because Reset EP handler will run after us. + */ + if (ep->ep_state & EP_HALTED) + break; + /* + * On some HCs EP state remains Stopped for some tens of + * us to a few ms or more after a doorbell ring, and any + * new Stop Endpoint fails without aborting the restart. + * This handler may run quickly enough to still see this + * Stopped state, but it will soon change to Running. + * + * Assume this bug on unexpected Stop Endpoint failures. + * Keep retrying until the EP starts and stops again, on + * chips where this is known to help. Wait for 100ms. */ if (!(xhci->quirks & XHCI_NEC_HOST)) break; + if (time_is_before_jiffies(ep->stop_time + msecs_to_jiffies(100))) + break; fallthrough; case EP_STATE_RUNNING: /* Race, HW handled stop ep cmd before ep was running */ - xhci_dbg(xhci, "Stop ep completion ctx error, ep is running\n"); + xhci_dbg(xhci, "Stop ep completion ctx error, ctx_state %d\n", + GET_EP_CTX_STATE(ep_ctx)); command = xhci_alloc_command(xhci, false, GFP_ATOMIC); if (!command) { diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index dee9965e2f045..0e91e7a9e1f00 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -8,6 +8,7 @@ * Some code borrowed from the Linux EHCI driver. */ +#include #include #include #include @@ -1746,6 +1747,7 @@ static int xhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status) ret = -ENOMEM; goto done; } + ep->stop_time = jiffies; ep->ep_state |= EP_STOP_CMD_PENDING; xhci_queue_stop_endpoint(xhci, command, urb->dev->slot_id, ep_index, 0); diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index eefcfd40848fc..4b342a6eace27 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -718,6 +718,7 @@ struct xhci_virt_ep { /* Bandwidth checking storage */ struct xhci_bw_info bw_info; struct list_head bw_endpoint_list; + unsigned long stop_time; /* Isoch Frame ID checking storage */ int next_frame_id; /* Use new Isoch TRB layout needed for extended TBC support */ From 1d1cebc9f0241bb02c5ab8c1450056ff22bc331d Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Tue, 17 Dec 2024 12:21:21 +0200 Subject: [PATCH 068/216] xhci: Turn NEC specific quirk for handling Stop Endpoint errors generic [ Upstream commit e21ebe51af688eb98fd6269240212a3c7300deea ] xHC hosts from several vendors have the same issue where endpoints start so slowly that a later queued 'Stop Endpoint' command may complete before endpoint is up and running. The 'Stop Endpoint' command fails with context state error as the endpoint still appears as stopped. See commit 42b758137601 ("usb: xhci: Limit Stop Endpoint retries") for details CC: stable@vger.kernel.org Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20241217102122.2316814-2-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin (cherry picked from commit 6cd8e621a689b0b3f25b384702c2574f06ad1edb) --- drivers/usb/host/xhci-ring.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index f5cad2e8413b3..ee07a1cc4ba46 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -1203,8 +1203,6 @@ static void xhci_handle_cmd_stop_ep(struct xhci_hcd *xhci, int slot_id, * Keep retrying until the EP starts and stops again, on * chips where this is known to help. Wait for 100ms. */ - if (!(xhci->quirks & XHCI_NEC_HOST)) - break; if (time_is_before_jiffies(ep->stop_time + msecs_to_jiffies(100))) break; fallthrough; From c5a2b54b28eeb37cafd39224212c73482c3a327a Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Fri, 20 May 2022 13:47:11 +0300 Subject: [PATCH 069/216] thunderbolt: Add support for Intel Lunar Lake [ Upstream commit 2cd3da4e37453019e21a486d9de3144f46b4fdf7 ] Intel Lunar Lake has similar integrated Thunderbolt/USB4 controller as Intel Meteor Lake with some small differences in the host router (it has 3 DP IN adapters for instance). Add the Intel Lunar Lake PCI IDs to the driver list of supported devices. Tested-by: Pengfei Xu Signed-off-by: Mika Westerberg Stable-dep-of: 8644b48714dc ("thunderbolt: Add support for Intel Panther Lake-M/P") Signed-off-by: Sasha Levin (cherry picked from commit 888c554d3dfd22d0be5da7894db60550ef504d59) --- drivers/thunderbolt/nhi.c | 4 ++++ drivers/thunderbolt/nhi.h | 2 ++ 2 files changed, 6 insertions(+) diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c index 1ec6f9c82aef0..b22023fae60de 100644 --- a/drivers/thunderbolt/nhi.c +++ b/drivers/thunderbolt/nhi.c @@ -1524,6 +1524,10 @@ static struct pci_device_id nhi_ids[] = { .driver_data = (kernel_ulong_t)&icl_nhi_ops }, { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_MTL_P_NHI1), .driver_data = (kernel_ulong_t)&icl_nhi_ops }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_LNL_NHI0), + .driver_data = (kernel_ulong_t)&icl_nhi_ops }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_LNL_NHI1), + .driver_data = (kernel_ulong_t)&icl_nhi_ops }, { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_BARLOW_RIDGE_HOST_80G_NHI) }, { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_BARLOW_RIDGE_HOST_40G_NHI) }, diff --git a/drivers/thunderbolt/nhi.h b/drivers/thunderbolt/nhi.h index 0f029ce758825..7a07c7c1a9c2c 100644 --- a/drivers/thunderbolt/nhi.h +++ b/drivers/thunderbolt/nhi.h @@ -90,6 +90,8 @@ extern const struct tb_nhi_ops icl_nhi_ops; #define PCI_DEVICE_ID_INTEL_TGL_H_NHI1 0x9a21 #define PCI_DEVICE_ID_INTEL_RPL_NHI0 0xa73e #define PCI_DEVICE_ID_INTEL_RPL_NHI1 0xa76d +#define PCI_DEVICE_ID_INTEL_LNL_NHI0 0xa833 +#define PCI_DEVICE_ID_INTEL_LNL_NHI1 0xa834 #define PCI_CLASS_SERIAL_USB_USB4 0x0c0340 From 370a10f1c3d606d3fba1ec99b8b8a0dd6da9d03b Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Tue, 14 May 2024 10:15:14 +0300 Subject: [PATCH 070/216] thunderbolt: Add support for Intel Panther Lake-M/P [ Upstream commit 8644b48714dca8bf2f42a4ff8311de8efc9bd8c3 ] Intel Panther Lake-M/P has the same integrated Thunderbolt/USB4 controller as Lunar Lake. Add these PCI IDs to the driver list of supported devices. Cc: stable@vger.kernel.org Signed-off-by: Mika Westerberg Signed-off-by: Sasha Levin (cherry picked from commit 5a23e3e9e245f6ee3fb9411b94fb0f2341987328) --- drivers/thunderbolt/nhi.c | 8 ++++++++ drivers/thunderbolt/nhi.h | 4 ++++ 2 files changed, 12 insertions(+) diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c index b22023fae60de..79f2bf5df19a6 100644 --- a/drivers/thunderbolt/nhi.c +++ b/drivers/thunderbolt/nhi.c @@ -1528,6 +1528,14 @@ static struct pci_device_id nhi_ids[] = { .driver_data = (kernel_ulong_t)&icl_nhi_ops }, { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_LNL_NHI1), .driver_data = (kernel_ulong_t)&icl_nhi_ops }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_PTL_M_NHI0), + .driver_data = (kernel_ulong_t)&icl_nhi_ops }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_PTL_M_NHI1), + .driver_data = (kernel_ulong_t)&icl_nhi_ops }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_PTL_P_NHI0), + .driver_data = (kernel_ulong_t)&icl_nhi_ops }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_PTL_P_NHI1), + .driver_data = (kernel_ulong_t)&icl_nhi_ops }, { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_BARLOW_RIDGE_HOST_80G_NHI) }, { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_BARLOW_RIDGE_HOST_40G_NHI) }, diff --git a/drivers/thunderbolt/nhi.h b/drivers/thunderbolt/nhi.h index 7a07c7c1a9c2c..16744f25a9a06 100644 --- a/drivers/thunderbolt/nhi.h +++ b/drivers/thunderbolt/nhi.h @@ -92,6 +92,10 @@ extern const struct tb_nhi_ops icl_nhi_ops; #define PCI_DEVICE_ID_INTEL_RPL_NHI1 0xa76d #define PCI_DEVICE_ID_INTEL_LNL_NHI0 0xa833 #define PCI_DEVICE_ID_INTEL_LNL_NHI1 0xa834 +#define PCI_DEVICE_ID_INTEL_PTL_M_NHI0 0xe333 +#define PCI_DEVICE_ID_INTEL_PTL_M_NHI1 0xe334 +#define PCI_DEVICE_ID_INTEL_PTL_P_NHI0 0xe433 +#define PCI_DEVICE_ID_INTEL_PTL_P_NHI1 0xe434 #define PCI_CLASS_SERIAL_USB_USB4 0x0c0340 From baabc30d062789617a19f7ee5cc7fa04c0feb550 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 9 Dec 2024 10:25:51 -0600 Subject: [PATCH 071/216] thunderbolt: Don't display nvm_version unless upgrade supported [ Upstream commit e34f1717ef0632fcec5cb827e5e0e9f223d70c9b ] The read will never succeed if NVM wasn't initialized due to an unknown format. Add a new callback for visibility to only show when supported. Cc: stable@vger.kernel.org Fixes: aef9c693e7e5 ("thunderbolt: Move vendor specific NVM handling into nvm.c") Reported-by: Richard Hughes Closes: https://github.com/fwupd/fwupd/issues/8200 Signed-off-by: Mario Limonciello Signed-off-by: Mika Westerberg Signed-off-by: Sasha Levin (cherry picked from commit 5422f4321640bb90fbdcd09c2486ee1da06dd443) --- drivers/thunderbolt/retimer.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/thunderbolt/retimer.c b/drivers/thunderbolt/retimer.c index 47becb363adac..2ee8c5ebca7c3 100644 --- a/drivers/thunderbolt/retimer.c +++ b/drivers/thunderbolt/retimer.c @@ -98,6 +98,7 @@ static int tb_retimer_nvm_add(struct tb_retimer *rt) err_nvm: dev_dbg(&rt->dev, "NVM upgrade disabled\n"); + rt->no_nvm_upgrade = true; if (!IS_ERR(nvm)) tb_nvm_free(nvm); @@ -177,8 +178,6 @@ static ssize_t nvm_authenticate_show(struct device *dev, if (!rt->nvm) ret = -EAGAIN; - else if (rt->no_nvm_upgrade) - ret = -EOPNOTSUPP; else ret = sysfs_emit(buf, "%#x\n", rt->auth_status); @@ -331,6 +330,19 @@ static ssize_t vendor_show(struct device *dev, struct device_attribute *attr, } static DEVICE_ATTR_RO(vendor); +static umode_t retimer_is_visible(struct kobject *kobj, struct attribute *attr, + int n) +{ + struct device *dev = kobj_to_dev(kobj); + struct tb_retimer *rt = tb_to_retimer(dev); + + if (attr == &dev_attr_nvm_authenticate.attr || + attr == &dev_attr_nvm_version.attr) + return rt->no_nvm_upgrade ? 0 : attr->mode; + + return attr->mode; +} + static struct attribute *retimer_attrs[] = { &dev_attr_device.attr, &dev_attr_nvm_authenticate.attr, @@ -340,6 +352,7 @@ static struct attribute *retimer_attrs[] = { }; static const struct attribute_group retimer_group = { + .is_visible = retimer_is_visible, .attrs = retimer_attrs, }; From bc49cd0d09dded4b9e1707858256d52f69c2cca0 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Wed, 24 Jan 2024 13:12:46 +0800 Subject: [PATCH 072/216] x86, crash: wrap crash dumping code into crash related ifdefs [ Upstream commit a4eeb2176d89fdf2785851521577b94b31690a60 ] Now crash codes under kernel/ folder has been split out from kexec code, crash dumping can be separated from kexec reboot in config items on x86 with some adjustments. Here, also change some ifdefs or IS_ENABLED() check to more appropriate ones, e,g - #ifdef CONFIG_KEXEC_CORE -> #ifdef CONFIG_CRASH_DUMP - (!IS_ENABLED(CONFIG_KEXEC_CORE)) - > (!IS_ENABLED(CONFIG_CRASH_RESERVE)) [bhe@redhat.com: don't nest CONFIG_CRASH_DUMP ifdef inside CONFIG_KEXEC_CODE ifdef scope] Link: https://lore.kernel.org/all/SN6PR02MB4157931105FA68D72E3D3DB8D47B2@SN6PR02MB4157.namprd02.prod.outlook.com/T/#u Link: https://lkml.kernel.org/r/20240124051254.67105-7-bhe@redhat.com Signed-off-by: Baoquan He Cc: Al Viro Cc: Eric W. Biederman Cc: Hari Bathini Cc: Pingfan Liu Cc: Klara Modin Cc: Michael Kelley Cc: Nathan Chancellor Cc: Stephen Rothwell Cc: Yang Li Signed-off-by: Andrew Morton Stable-dep-of: bcc80dec91ee ("x86/hyperv: Fix hv tsc page based sched_clock for hibernation") Signed-off-by: Sasha Levin (cherry picked from commit e5b1574a8ca28c40cf53eda43f6c3b016ed41e27) --- arch/x86/kernel/Makefile | 4 ++-- arch/x86/kernel/cpu/mshyperv.c | 10 ++++++++-- arch/x86/kernel/kexec-bzimage64.c | 4 ++++ arch/x86/kernel/kvm.c | 4 ++-- arch/x86/kernel/machine_kexec_64.c | 3 +++ arch/x86/kernel/reboot.c | 4 ++-- arch/x86/kernel/setup.c | 2 +- arch/x86/kernel/smp.c | 2 +- arch/x86/xen/enlighten_hvm.c | 4 ++++ arch/x86/xen/mmu_pv.c | 2 +- 10 files changed, 28 insertions(+), 11 deletions(-) diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 70ef205fc1601..84cb0c72b38e8 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -99,9 +99,9 @@ obj-$(CONFIG_TRACING) += trace.o obj-$(CONFIG_RETHOOK) += rethook.o obj-$(CONFIG_CRASH_CORE) += crash_core_$(BITS).o obj-$(CONFIG_KEXEC_CORE) += machine_kexec_$(BITS).o -obj-$(CONFIG_KEXEC_CORE) += relocate_kernel_$(BITS).o crash.o +obj-$(CONFIG_KEXEC_CORE) += relocate_kernel_$(BITS).o obj-$(CONFIG_KEXEC_FILE) += kexec-bzimage64.o -obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o +obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o crash.o obj-y += kprobes/ obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_X86_32) += doublefault_32.o diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index bcb2d640a0cd8..93e1cb4f7ff19 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -209,7 +209,9 @@ static void hv_machine_shutdown(void) if (kexec_in_progress) hyperv_cleanup(); } +#endif /* CONFIG_KEXEC_CORE */ +#ifdef CONFIG_CRASH_DUMP static void hv_machine_crash_shutdown(struct pt_regs *regs) { if (hv_crash_handler) @@ -221,7 +223,7 @@ static void hv_machine_crash_shutdown(struct pt_regs *regs) /* Disable the hypercall page when there is only 1 active CPU. */ hyperv_cleanup(); } -#endif /* CONFIG_KEXEC_CORE */ +#endif /* CONFIG_CRASH_DUMP */ #endif /* CONFIG_HYPERV */ static uint32_t __init ms_hyperv_platform(void) @@ -493,9 +495,13 @@ static void __init ms_hyperv_init_platform(void) no_timer_check = 1; #endif -#if IS_ENABLED(CONFIG_HYPERV) && defined(CONFIG_KEXEC_CORE) +#if IS_ENABLED(CONFIG_HYPERV) +#if defined(CONFIG_KEXEC_CORE) machine_ops.shutdown = hv_machine_shutdown; +#endif +#if defined(CONFIG_CRASH_DUMP) machine_ops.crash_shutdown = hv_machine_crash_shutdown; +#endif #endif if (ms_hyperv.features & HV_ACCESS_TSC_INVARIANT) { /* diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c index a61c12c012709..0de509c02d18b 100644 --- a/arch/x86/kernel/kexec-bzimage64.c +++ b/arch/x86/kernel/kexec-bzimage64.c @@ -263,11 +263,13 @@ setup_boot_parameters(struct kimage *image, struct boot_params *params, memset(¶ms->hd0_info, 0, sizeof(params->hd0_info)); memset(¶ms->hd1_info, 0, sizeof(params->hd1_info)); +#ifdef CONFIG_CRASH_DUMP if (image->type == KEXEC_TYPE_CRASH) { ret = crash_setup_memmap_entries(image, params); if (ret) return ret; } else +#endif setup_e820_entries(params); nr_e820_entries = params->e820_entries; @@ -428,12 +430,14 @@ static void *bzImage64_load(struct kimage *image, char *kernel, return ERR_PTR(-EINVAL); } +#ifdef CONFIG_CRASH_DUMP /* Allocate and load backup region */ if (image->type == KEXEC_TYPE_CRASH) { ret = crash_load_segments(image); if (ret) return ERR_PTR(ret); } +#endif /* * Load purgatory. For 64bit entry point, purgatory code can be diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index b8ab9ee5896c1..38d88c8b56ec0 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -769,7 +769,7 @@ static struct notifier_block kvm_pv_reboot_nb = { * won't be valid. In cases like kexec, in which you install a new kernel, this * means a random memory location will be kept being written. */ -#ifdef CONFIG_KEXEC_CORE +#ifdef CONFIG_CRASH_DUMP static void kvm_crash_shutdown(struct pt_regs *regs) { kvm_guest_cpu_offline(true); @@ -852,7 +852,7 @@ static void __init kvm_guest_init(void) kvm_guest_cpu_init(); #endif -#ifdef CONFIG_KEXEC_CORE +#ifdef CONFIG_CRASH_DUMP machine_ops.crash_shutdown = kvm_crash_shutdown; #endif diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 2fa12d1dc6760..aaeac2deb85dc 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -545,6 +545,8 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image) } #endif /* CONFIG_KEXEC_FILE */ +#ifdef CONFIG_CRASH_DUMP + static int kexec_mark_range(unsigned long start, unsigned long end, bool protect) { @@ -589,6 +591,7 @@ void arch_kexec_unprotect_crashkres(void) { kexec_mark_crashkres(false); } +#endif /* * During a traditional boot under SME, SME will encrypt the kernel, diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 830425e6d38e2..f3130f762784a 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -796,7 +796,7 @@ struct machine_ops machine_ops __ro_after_init = { .emergency_restart = native_machine_emergency_restart, .restart = native_machine_restart, .halt = native_machine_halt, -#ifdef CONFIG_KEXEC_CORE +#ifdef CONFIG_CRASH_DUMP .crash_shutdown = native_machine_crash_shutdown, #endif }; @@ -826,7 +826,7 @@ void machine_halt(void) machine_ops.halt(); } -#ifdef CONFIG_KEXEC_CORE +#ifdef CONFIG_CRASH_DUMP void machine_crash_shutdown(struct pt_regs *regs) { machine_ops.crash_shutdown(regs); diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 44148dcfcec2c..b66f909bf1cd1 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -548,7 +548,7 @@ static void __init reserve_crashkernel(void) bool high = false; int ret; - if (!IS_ENABLED(CONFIG_KEXEC_CORE)) + if (!IS_ENABLED(CONFIG_CRASH_RESERVE)) return; total_mem = memblock_phys_mem_size(); diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 96a771f9f930a..52c3823b72119 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -282,7 +282,7 @@ struct smp_ops smp_ops = { .smp_cpus_done = native_smp_cpus_done, .stop_other_cpus = native_stop_other_cpus, -#if defined(CONFIG_KEXEC_CORE) +#if defined(CONFIG_CRASH_DUMP) .crash_stop_other_cpus = kdump_nmi_shootdown_cpus, #endif .smp_send_reschedule = native_smp_send_reschedule, diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c index 70be57e8f51ca..ade22feee7aeb 100644 --- a/arch/x86/xen/enlighten_hvm.c +++ b/arch/x86/xen/enlighten_hvm.c @@ -141,7 +141,9 @@ static void xen_hvm_shutdown(void) if (kexec_in_progress) xen_reboot(SHUTDOWN_soft_reset); } +#endif +#ifdef CONFIG_CRASH_DUMP static void xen_hvm_crash_shutdown(struct pt_regs *regs) { native_machine_crash_shutdown(regs); @@ -229,6 +231,8 @@ static void __init xen_hvm_guest_init(void) #ifdef CONFIG_KEXEC_CORE machine_ops.shutdown = xen_hvm_shutdown; +#endif +#ifdef CONFIG_CRASH_DUMP machine_ops.crash_shutdown = xen_hvm_crash_shutdown; #endif } diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 6b201e64d8abc..bfd57d07f4b5e 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -2517,7 +2517,7 @@ int xen_remap_pfn(struct vm_area_struct *vma, unsigned long addr, } EXPORT_SYMBOL_GPL(xen_remap_pfn); -#ifdef CONFIG_KEXEC_CORE +#ifdef CONFIG_VMCORE_INFO phys_addr_t paddr_vmcoreinfo_note(void) { if (xen_pv_domain()) From 612d0007d4b6e4c25ef01d38f45384a25256094a Mon Sep 17 00:00:00 2001 From: Naman Jain Date: Tue, 17 Sep 2024 11:09:17 +0530 Subject: [PATCH 073/216] x86/hyperv: Fix hv tsc page based sched_clock for hibernation [ Upstream commit bcc80dec91ee745b3d66f3e48f0ec2efdea97149 ] read_hv_sched_clock_tsc() assumes that the Hyper-V clock counter is bigger than the variable hv_sched_clock_offset, which is cached during early boot, but depending on the timing this assumption may be false when a hibernated VM starts again (the clock counter starts from 0 again) and is resuming back (Note: hv_init_tsc_clocksource() is not called during hibernation/resume); consequently, read_hv_sched_clock_tsc() may return a negative integer (which is interpreted as a huge positive integer since the return type is u64) and new kernel messages are prefixed with huge timestamps before read_hv_sched_clock_tsc() grows big enough (which typically takes several seconds). Fix the issue by saving the Hyper-V clock counter just before the suspend, and using it to correct the hv_sched_clock_offset in resume. This makes hv tsc page based sched_clock continuous and ensures that post resume, it starts from where it left off during suspend. Override x86_platform.save_sched_clock_state and x86_platform.restore_sched_clock_state routines to correct this as soon as possible. Note: if Invariant TSC is available, the issue doesn't happen because 1) we don't register read_hv_sched_clock_tsc() for sched clock: See commit e5313f1c5404 ("clocksource/drivers/hyper-v: Rework clocksource and sched clock setup"); 2) the common x86 code adjusts TSC similarly: see __restore_processor_state() -> tsc_verify_tsc_adjust(true) and x86_platform.restore_sched_clock_state(). Cc: stable@vger.kernel.org Fixes: 1349401ff1aa ("clocksource/drivers/hyper-v: Suspend/resume Hyper-V clocksource for hibernation") Co-developed-by: Dexuan Cui Signed-off-by: Dexuan Cui Signed-off-by: Naman Jain Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20240917053917.76787-1-namjain@linux.microsoft.com Signed-off-by: Wei Liu Message-ID: <20240917053917.76787-1-namjain@linux.microsoft.com> Signed-off-by: Sasha Levin (cherry picked from commit 6681113633dc738ec95fe33104843a1e25acef3b) --- arch/x86/kernel/cpu/mshyperv.c | 58 ++++++++++++++++++++++++++++++ drivers/clocksource/hyperv_timer.c | 14 +++++++- include/clocksource/hyperv_timer.h | 2 ++ 3 files changed, 73 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 93e1cb4f7ff19..6328cf56e59be 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -224,6 +224,63 @@ static void hv_machine_crash_shutdown(struct pt_regs *regs) hyperv_cleanup(); } #endif /* CONFIG_CRASH_DUMP */ + +static u64 hv_ref_counter_at_suspend; +static void (*old_save_sched_clock_state)(void); +static void (*old_restore_sched_clock_state)(void); + +/* + * Hyper-V clock counter resets during hibernation. Save and restore clock + * offset during suspend/resume, while also considering the time passed + * before suspend. This is to make sure that sched_clock using hv tsc page + * based clocksource, proceeds from where it left off during suspend and + * it shows correct time for the timestamps of kernel messages after resume. + */ +static void save_hv_clock_tsc_state(void) +{ + hv_ref_counter_at_suspend = hv_read_reference_counter(); +} + +static void restore_hv_clock_tsc_state(void) +{ + /* + * Adjust the offsets used by hv tsc clocksource to + * account for the time spent before hibernation. + * adjusted value = reference counter (time) at suspend + * - reference counter (time) now. + */ + hv_adj_sched_clock_offset(hv_ref_counter_at_suspend - hv_read_reference_counter()); +} + +/* + * Functions to override save_sched_clock_state and restore_sched_clock_state + * functions of x86_platform. The Hyper-V clock counter is reset during + * suspend-resume and the offset used to measure time needs to be + * corrected, post resume. + */ +static void hv_save_sched_clock_state(void) +{ + old_save_sched_clock_state(); + save_hv_clock_tsc_state(); +} + +static void hv_restore_sched_clock_state(void) +{ + restore_hv_clock_tsc_state(); + old_restore_sched_clock_state(); +} + +static void __init x86_setup_ops_for_tsc_pg_clock(void) +{ + if (!(ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE)) + return; + + old_save_sched_clock_state = x86_platform.save_sched_clock_state; + x86_platform.save_sched_clock_state = hv_save_sched_clock_state; + + old_restore_sched_clock_state = x86_platform.restore_sched_clock_state; + x86_platform.restore_sched_clock_state = hv_restore_sched_clock_state; +} #endif /* CONFIG_HYPERV */ static uint32_t __init ms_hyperv_platform(void) @@ -578,6 +635,7 @@ static void __init ms_hyperv_init_platform(void) /* Register Hyper-V specific clocksource */ hv_init_clocksource(); + x86_setup_ops_for_tsc_pg_clock(); hv_vtl_init_platform(); #endif /* diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c index 8ff7cd4e20bb1..5eec1457e1396 100644 --- a/drivers/clocksource/hyperv_timer.c +++ b/drivers/clocksource/hyperv_timer.c @@ -27,7 +27,8 @@ #include static struct clock_event_device __percpu *hv_clock_event; -static u64 hv_sched_clock_offset __ro_after_init; +/* Note: offset can hold negative values after hibernation. */ +static u64 hv_sched_clock_offset __read_mostly; /* * If false, we're using the old mechanism for stimer0 interrupts @@ -456,6 +457,17 @@ static void resume_hv_clock_tsc(struct clocksource *arg) hv_set_register(HV_REGISTER_REFERENCE_TSC, tsc_msr.as_uint64); } +/* + * Called during resume from hibernation, from overridden + * x86_platform.restore_sched_clock_state routine. This is to adjust offsets + * used to calculate time for hv tsc page based sched_clock, to account for + * time spent before hibernation. + */ +void hv_adj_sched_clock_offset(u64 offset) +{ + hv_sched_clock_offset -= offset; +} + #ifdef HAVE_VDSO_CLOCKMODE_HVCLOCK static int hv_cs_enable(struct clocksource *cs) { diff --git a/include/clocksource/hyperv_timer.h b/include/clocksource/hyperv_timer.h index 6cdc873ac907f..aa5233b1eba97 100644 --- a/include/clocksource/hyperv_timer.h +++ b/include/clocksource/hyperv_timer.h @@ -38,6 +38,8 @@ extern void hv_remap_tsc_clocksource(void); extern unsigned long hv_get_tsc_pfn(void); extern struct ms_hyperv_tsc_page *hv_get_tsc_page(void); +extern void hv_adj_sched_clock_offset(u64 offset); + static __always_inline bool hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg, u64 *cur_tsc, u64 *time) From e7cd6a73860fce7a6e9452237bb25c6c379b66b0 Mon Sep 17 00:00:00 2001 From: Herve Codina Date: Tue, 17 Oct 2023 13:02:17 +0200 Subject: [PATCH 074/216] of: address: Remove duplicated functions [ Upstream commit 3eb030c60835668997d5763b1a0c7938faf169f6 ] The recently added of_bus_default_flags_translate() performs the exact same operation as of_bus_pci_translate() and of_bus_isa_translate(). Avoid duplicated code replacing both of_bus_pci_translate() and of_bus_isa_translate() with of_bus_default_flags_translate(). Signed-off-by: Herve Codina Link: https://lore.kernel.org/r/20231017110221.189299-3-herve.codina@bootlin.com Signed-off-by: Rob Herring Stable-dep-of: 7f05e20b989a ("of: address: Preserve the flags portion on 1:1 dma-ranges mapping") Signed-off-by: Sasha Levin (cherry picked from commit 7a40a884f597750d4ed6915edfaed5bc7f10e084) --- drivers/of/address.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/drivers/of/address.c b/drivers/of/address.c index dfd05cb2b2fcf..cfe5a11b620aa 100644 --- a/drivers/of/address.c +++ b/drivers/of/address.c @@ -217,10 +217,6 @@ static u64 of_bus_pci_map(__be32 *addr, const __be32 *range, int na, int ns, return da - cp; } -static int of_bus_pci_translate(__be32 *addr, u64 offset, int na) -{ - return of_bus_default_translate(addr + 1, offset, na - 1); -} #endif /* CONFIG_PCI */ /* @@ -344,11 +340,6 @@ static u64 of_bus_isa_map(__be32 *addr, const __be32 *range, int na, int ns, return da - cp; } -static int of_bus_isa_translate(__be32 *addr, u64 offset, int na) -{ - return of_bus_default_translate(addr + 1, offset, na - 1); -} - static unsigned int of_bus_isa_get_flags(const __be32 *addr) { unsigned int flags = 0; @@ -379,7 +370,7 @@ static struct of_bus of_busses[] = { .match = of_bus_pci_match, .count_cells = of_bus_pci_count_cells, .map = of_bus_pci_map, - .translate = of_bus_pci_translate, + .translate = of_bus_default_flags_translate, .has_flags = true, .get_flags = of_bus_pci_get_flags, }, @@ -391,7 +382,7 @@ static struct of_bus of_busses[] = { .match = of_bus_isa_match, .count_cells = of_bus_isa_count_cells, .map = of_bus_isa_map, - .translate = of_bus_isa_translate, + .translate = of_bus_default_flags_translate, .has_flags = true, .get_flags = of_bus_isa_get_flags, }, From 06822db90c46eb677a7ca7955b9af7a517b001fe Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 26 Oct 2023 08:53:58 -0500 Subject: [PATCH 075/216] of: address: Store number of bus flag cells rather than bool [ Upstream commit 88696db08b7efa3b6bb722014ea7429e78f6be32 ] It is more useful to know how many flags cells a bus has rather than whether a bus has flags or not as ultimately the number of cells is the information used. Replace 'has_flags' boolean with 'flag_cells' count. Acked-by: Herve Codina Link: https://lore.kernel.org/r/20231026135358.3564307-2-robh@kernel.org Signed-off-by: Rob Herring Stable-dep-of: 7f05e20b989a ("of: address: Preserve the flags portion on 1:1 dma-ranges mapping") Signed-off-by: Sasha Levin (cherry picked from commit 443f803b332b9f78a843c08453b33981dcde9af8) --- drivers/of/address.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/of/address.c b/drivers/of/address.c index cfe5a11b620aa..cdefe5a89e5d2 100644 --- a/drivers/of/address.c +++ b/drivers/of/address.c @@ -46,7 +46,7 @@ struct of_bus { u64 (*map)(__be32 *addr, const __be32 *range, int na, int ns, int pna); int (*translate)(__be32 *addr, u64 offset, int na); - bool has_flags; + int flag_cells; unsigned int (*get_flags)(const __be32 *addr); }; @@ -371,7 +371,7 @@ static struct of_bus of_busses[] = { .count_cells = of_bus_pci_count_cells, .map = of_bus_pci_map, .translate = of_bus_default_flags_translate, - .has_flags = true, + .flag_cells = 1, .get_flags = of_bus_pci_get_flags, }, #endif /* CONFIG_PCI */ @@ -383,7 +383,7 @@ static struct of_bus of_busses[] = { .count_cells = of_bus_isa_count_cells, .map = of_bus_isa_map, .translate = of_bus_default_flags_translate, - .has_flags = true, + .flag_cells = 1, .get_flags = of_bus_isa_get_flags, }, /* Default with flags cell */ @@ -394,7 +394,7 @@ static struct of_bus of_busses[] = { .count_cells = of_bus_default_count_cells, .map = of_bus_default_flags_map, .translate = of_bus_default_flags_translate, - .has_flags = true, + .flag_cells = 1, .get_flags = of_bus_default_flags_get_flags, }, /* Default */ @@ -827,7 +827,7 @@ struct of_pci_range *of_pci_range_parser_one(struct of_pci_range_parser *parser, int na = parser->na; int ns = parser->ns; int np = parser->pna + na + ns; - int busflag_na = 0; + int busflag_na = parser->bus->flag_cells; if (!range) return NULL; @@ -837,10 +837,6 @@ struct of_pci_range *of_pci_range_parser_one(struct of_pci_range_parser *parser, range->flags = parser->bus->get_flags(parser->range); - /* A extra cell for resource flags */ - if (parser->bus->has_flags) - busflag_na = 1; - range->bus_addr = of_read_number(parser->range + busflag_na, na - busflag_na); if (parser->dma) From e1499b002e67a74942e7d24f65296f1f6a57d9de Mon Sep 17 00:00:00 2001 From: Andrea della Porta Date: Sun, 24 Nov 2024 11:05:37 +0100 Subject: [PATCH 076/216] of: address: Preserve the flags portion on 1:1 dma-ranges mapping [ Upstream commit 7f05e20b989ac33c9c0f8c2028ec0a566493548f ] A missing or empty dma-ranges in a DT node implies a 1:1 mapping for dma translations. In this specific case, the current behaviour is to zero out the entire specifier so that the translation could be carried on as an offset from zero. This includes address specifier that has flags (e.g. PCI ranges). Once the flags portion has been zeroed, the translation chain is broken since the mapping functions will check the upcoming address specifier against mismatching flags, always failing the 1:1 mapping and its entire purpose of always succeeding. Set to zero only the address portion while passing the flags through. Fixes: dbbdee94734b ("of/address: Merge all of the bus translation code") Cc: stable@vger.kernel.org Signed-off-by: Andrea della Porta Tested-by: Herve Codina Link: https://lore.kernel.org/r/e51ae57874e58a9b349c35e2e877425ebc075d7a.1732441813.git.andrea.porta@suse.com Signed-off-by: Rob Herring (Arm) Signed-off-by: Sasha Levin (cherry picked from commit b222816f9c431e9303bf466a343e4db4c1942b40) --- drivers/of/address.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/of/address.c b/drivers/of/address.c index cdefe5a89e5d2..34d880a1be0a5 100644 --- a/drivers/of/address.c +++ b/drivers/of/address.c @@ -476,7 +476,8 @@ static int of_translate_one(struct device_node *parent, struct of_bus *bus, } if (ranges == NULL || rlen == 0) { offset = of_read_number(addr, na); - memset(addr, 0, pna * 4); + /* set address to zero, pass flags through */ + memset(addr + pbus->flag_cells, 0, (pna - pbus->flag_cells) * 4); pr_debug("empty ranges; 1:1 translation\n"); goto finish; } From 53c855dc18362d5df36844b99a439be4767142a0 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Fri, 31 May 2024 09:57:19 +0300 Subject: [PATCH 077/216] watchdog: rzg2l_wdt: Remove reset de-assert from probe [ Upstream commit 064319c3fac88e04f53f3460cd24ae90de2d9fb6 ] There is no need to de-assert the reset signal on probe as the watchdog is not used prior executing start. Also, the clocks are not enabled in probe (pm_runtime_enable() doesn't do that), thus this is another indicator that the watchdog wasn't used previously like this. Instead, keep the watchdog hardware in its previous state at probe (by default it is in reset state), enable it when it is started and move it to reset state when it is stopped. This saves some extra power when the watchdog is unused. Signed-off-by: Claudiu Beznea Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20240531065723.1085423-6-claudiu.beznea.uj@bp.renesas.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck Stable-dep-of: bad201b2ac4e ("watchdog: rzg2l_wdt: Power on the watchdog domain in the restart handler") Signed-off-by: Sasha Levin (cherry picked from commit 7ea100fb50bbb49991af0704f1575ba8053d90b0) --- drivers/watchdog/rzg2l_wdt.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/drivers/watchdog/rzg2l_wdt.c b/drivers/watchdog/rzg2l_wdt.c index 7bce093316c4d..7aad66da138a8 100644 --- a/drivers/watchdog/rzg2l_wdt.c +++ b/drivers/watchdog/rzg2l_wdt.c @@ -129,6 +129,12 @@ static int rzg2l_wdt_start(struct watchdog_device *wdev) if (ret) return ret; + ret = reset_control_deassert(priv->rstc); + if (ret) { + pm_runtime_put(wdev->parent); + return ret; + } + /* Initialize time out */ rzg2l_wdt_init_timeout(wdev); @@ -146,7 +152,9 @@ static int rzg2l_wdt_stop(struct watchdog_device *wdev) struct rzg2l_wdt_priv *priv = watchdog_get_drvdata(wdev); int ret; - rzg2l_wdt_reset(priv); + ret = reset_control_assert(priv->rstc); + if (ret) + return ret; ret = pm_runtime_put(wdev->parent); if (ret < 0) @@ -186,6 +194,12 @@ static int rzg2l_wdt_restart(struct watchdog_device *wdev, clk_prepare_enable(priv->osc_clk); if (priv->devtype == WDT_RZG2L) { + int ret; + + ret = reset_control_deassert(priv->rstc); + if (ret) + return ret; + /* Generate Reset (WDTRSTB) Signal on parity error */ rzg2l_wdt_write(priv, 0, PECR); @@ -236,13 +250,11 @@ static const struct watchdog_ops rzg2l_wdt_ops = { .restart = rzg2l_wdt_restart, }; -static void rzg2l_wdt_reset_assert_pm_disable(void *data) +static void rzg2l_wdt_pm_disable(void *data) { struct watchdog_device *wdev = data; - struct rzg2l_wdt_priv *priv = watchdog_get_drvdata(wdev); pm_runtime_disable(wdev->parent); - reset_control_assert(priv->rstc); } static int rzg2l_wdt_probe(struct platform_device *pdev) @@ -285,10 +297,6 @@ static int rzg2l_wdt_probe(struct platform_device *pdev) return dev_err_probe(&pdev->dev, PTR_ERR(priv->rstc), "failed to get cpg reset"); - ret = reset_control_deassert(priv->rstc); - if (ret) - return dev_err_probe(dev, ret, "failed to deassert"); - priv->devtype = (uintptr_t)of_device_get_match_data(dev); if (priv->devtype == WDT_RZV2M) { @@ -309,9 +317,7 @@ static int rzg2l_wdt_probe(struct platform_device *pdev) priv->wdev.timeout = WDT_DEFAULT_TIMEOUT; watchdog_set_drvdata(&priv->wdev, priv); - ret = devm_add_action_or_reset(&pdev->dev, - rzg2l_wdt_reset_assert_pm_disable, - &priv->wdev); + ret = devm_add_action_or_reset(&pdev->dev, rzg2l_wdt_pm_disable, &priv->wdev); if (ret < 0) return ret; From 6e05c656d0ead4a4cbd1550e0de322f97d8e23c2 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Fri, 31 May 2024 09:57:21 +0300 Subject: [PATCH 078/216] watchdog: rzg2l_wdt: Rely on the reset driver for doing proper reset [ Upstream commit d8997ed79ed7c7c32b2ae571e0d99a58bbfd01fe ] The reset driver has been adapted in commit da235d2fac21 ("clk: renesas: rzg2l: Check reset monitor registers") to check the reset monitor bits before declaring reset asserts/de-asserts as successful/failure operations. With that, there is no need to keep the reset workaround for RZ/V2M in place in the watchdog driver. Signed-off-by: Claudiu Beznea Reviewed-by: Philipp Zabel Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20240531065723.1085423-8-claudiu.beznea.uj@bp.renesas.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck Stable-dep-of: bad201b2ac4e ("watchdog: rzg2l_wdt: Power on the watchdog domain in the restart handler") Signed-off-by: Sasha Levin (cherry picked from commit e145b77fb5c1829bfd320f839bfea006a5878b0a) --- drivers/watchdog/rzg2l_wdt.c | 39 ++++-------------------------------- 1 file changed, 4 insertions(+), 35 deletions(-) diff --git a/drivers/watchdog/rzg2l_wdt.c b/drivers/watchdog/rzg2l_wdt.c index 7aad66da138a8..d09f938415fcb 100644 --- a/drivers/watchdog/rzg2l_wdt.c +++ b/drivers/watchdog/rzg2l_wdt.c @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include @@ -54,35 +53,11 @@ struct rzg2l_wdt_priv { struct reset_control *rstc; unsigned long osc_clk_rate; unsigned long delay; - unsigned long minimum_assertion_period; struct clk *pclk; struct clk *osc_clk; enum rz_wdt_type devtype; }; -static int rzg2l_wdt_reset(struct rzg2l_wdt_priv *priv) -{ - int err, status; - - if (priv->devtype == WDT_RZV2M) { - /* WDT needs TYPE-B reset control */ - err = reset_control_assert(priv->rstc); - if (err) - return err; - ndelay(priv->minimum_assertion_period); - err = reset_control_deassert(priv->rstc); - if (err) - return err; - err = read_poll_timeout(reset_control_status, status, - status != 1, 0, 1000, false, - priv->rstc); - } else { - err = reset_control_reset(priv->rstc); - } - - return err; -} - static void rzg2l_wdt_wait_delay(struct rzg2l_wdt_priv *priv) { /* delay timer when change the setting register */ @@ -189,13 +164,12 @@ static int rzg2l_wdt_restart(struct watchdog_device *wdev, unsigned long action, void *data) { struct rzg2l_wdt_priv *priv = watchdog_get_drvdata(wdev); + int ret; clk_prepare_enable(priv->pclk); clk_prepare_enable(priv->osc_clk); if (priv->devtype == WDT_RZG2L) { - int ret; - ret = reset_control_deassert(priv->rstc); if (ret) return ret; @@ -207,7 +181,9 @@ static int rzg2l_wdt_restart(struct watchdog_device *wdev, rzg2l_wdt_write(priv, PEEN_FORCE, PEEN); } else { /* RZ/V2M doesn't have parity error registers */ - rzg2l_wdt_reset(priv); + ret = reset_control_reset(priv->rstc); + if (ret) + return ret; wdev->timeout = 0; @@ -299,13 +275,6 @@ static int rzg2l_wdt_probe(struct platform_device *pdev) priv->devtype = (uintptr_t)of_device_get_match_data(dev); - if (priv->devtype == WDT_RZV2M) { - priv->minimum_assertion_period = RZV2M_A_NSEC + - 3 * F2CYCLE_NSEC(pclk_rate) + 5 * - max(F2CYCLE_NSEC(priv->osc_clk_rate), - F2CYCLE_NSEC(pclk_rate)); - } - pm_runtime_enable(&pdev->dev); priv->wdev.info = &rzg2l_wdt_ident; From 6c8703932e4bdeede42b1a924a58989e09b024f6 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Tue, 15 Oct 2024 19:47:32 +0300 Subject: [PATCH 079/216] watchdog: rzg2l_wdt: Power on the watchdog domain in the restart handler [ Upstream commit bad201b2ac4e238c6d4b6966a220240e3861640c ] On RZ/G3S the watchdog can be part of a software-controlled PM domain. In this case, the watchdog device need to be powered on in struct watchdog_ops::restart API. This can be done though pm_runtime_resume_and_get() API if the watchdog PM domain and watchdog device are marked as IRQ safe. We mark the watchdog PM domain as IRQ safe with GENPD_FLAG_IRQ_SAFE when the watchdog PM domain is registered and the watchdog device though pm_runtime_irq_safe(). Before commit e4cf89596c1f ("watchdog: rzg2l_wdt: Fix 'BUG: Invalid wait context'") pm_runtime_get_sync() was used in watchdog restart handler (which is similar to pm_runtime_resume_and_get() except the later one handles the runtime resume errors). Commit e4cf89596c1f ("watchdog: rzg2l_wdt: Fix 'BUG: Invalid wait context'") dropped the pm_runtime_get_sync() and replaced it with clk_prepare_enable() to avoid invalid wait context due to genpd_lock() in genpd_runtime_resume() being called from atomic context. But clk_prepare_enable() doesn't fit for this either (as reported by Ulf Hansson) as clk_prepare() can also sleep (it just not throw invalid wait context warning as it is not written for this). Because the watchdog device is marked now as IRQ safe (though this patch) the irq_safe_dev_in_sleep_domain() call from genpd_runtime_resume() returns 1 for devices not registering an IRQ safe PM domain for watchdog (as the watchdog device is IRQ safe, PM domain is not and watchdog PM domain is always-on), this being the case for RZ/G3S with old device trees and the rest of the SoCs that use this driver, we can now drop also the clk_prepare_enable() calls in restart handler and rely on pm_runtime_resume_and_get(). Thus, drop clk_prepare_enable() and use pm_runtime_resume_and_get() in watchdog restart handler. Signed-off-by: Claudiu Beznea Reviewed-by: Ulf Hansson Reviewed-by: Geert Uytterhoeven Acked-by: Guenter Roeck Link: https://lore.kernel.org/r/20241015164732.4085249-5-claudiu.beznea.uj@bp.renesas.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck Signed-off-by: Sasha Levin (cherry picked from commit d33523b0beb5dd5a82165da0aeede9283d4c0ff9) --- drivers/watchdog/rzg2l_wdt.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/watchdog/rzg2l_wdt.c b/drivers/watchdog/rzg2l_wdt.c index d09f938415fcb..525a72d8d746e 100644 --- a/drivers/watchdog/rzg2l_wdt.c +++ b/drivers/watchdog/rzg2l_wdt.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -166,8 +167,22 @@ static int rzg2l_wdt_restart(struct watchdog_device *wdev, struct rzg2l_wdt_priv *priv = watchdog_get_drvdata(wdev); int ret; - clk_prepare_enable(priv->pclk); - clk_prepare_enable(priv->osc_clk); + /* + * In case of RZ/G3S the watchdog device may be part of an IRQ safe power + * domain that is currently powered off. In this case we need to power + * it on before accessing registers. Along with this the clocks will be + * enabled. We don't undo the pm_runtime_resume_and_get() as the device + * need to be on for the reboot to happen. + * + * For the rest of SoCs not registering a watchdog IRQ safe power + * domain it is safe to call pm_runtime_resume_and_get() as the + * irq_safe_dev_in_sleep_domain() call in genpd_runtime_resume() + * returns non zero value and the genpd_lock() is avoided, thus, there + * will be no invalid wait context reported by lockdep. + */ + ret = pm_runtime_resume_and_get(wdev->parent); + if (ret) + return ret; if (priv->devtype == WDT_RZG2L) { ret = reset_control_deassert(priv->rstc); @@ -275,6 +290,7 @@ static int rzg2l_wdt_probe(struct platform_device *pdev) priv->devtype = (uintptr_t)of_device_get_match_data(dev); + pm_runtime_irq_safe(&pdev->dev); pm_runtime_enable(&pdev->dev); priv->wdev.info = &rzg2l_wdt_ident; From c075cc666a8004fe3ce37a3e0fae84738cb00494 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 17 Oct 2023 14:44:23 -0400 Subject: [PATCH 080/216] udf_rename(): only access the child content on cross-directory rename [ Upstream commit 9d35cebb794bb7be93db76c3383979c7deacfef9 ] We can't really afford locking the source on same-directory rename; currently vfs_rename() tries to do that, but it will have to be changed. The logics in udf_rename() is lazy and goes looking for ".." in source even in same-directory case. It's not hard to get rid of that, leaving that behaviour only for cross-directory case; that VFS can get locks safely (and will keep doing that after the coming changes). Reviewed-by: Jan Kara Signed-off-by: Al Viro Stable-dep-of: 6756af923e06 ("udf: Verify inode link counts before performing rename") Signed-off-by: Sasha Levin (cherry picked from commit 17b312c5d86909bd53da5a49cd4589a17f97c439) --- fs/udf/namei.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/udf/namei.c b/fs/udf/namei.c index b3f57ad2b869f..0461a7b1e9b41 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -770,7 +770,7 @@ static int udf_rename(struct mnt_idmap *idmap, struct inode *old_dir, struct inode *old_inode = d_inode(old_dentry); struct inode *new_inode = d_inode(new_dentry); struct udf_fileident_iter oiter, niter, diriter; - bool has_diriter = false; + bool has_diriter = false, is_dir = false; int retval; struct kernel_lb_addr tloc; @@ -793,6 +793,9 @@ static int udf_rename(struct mnt_idmap *idmap, struct inode *old_dir, if (!empty_dir(new_inode)) goto out_oiter; } + is_dir = true; + } + if (is_dir && old_dir != new_dir) { retval = udf_fiiter_find_entry(old_inode, &dotdot_name, &diriter); if (retval == -ENOENT) { @@ -880,7 +883,9 @@ static int udf_rename(struct mnt_idmap *idmap, struct inode *old_dir, cpu_to_lelb(UDF_I(new_dir)->i_location); udf_fiiter_write_fi(&diriter, NULL); udf_fiiter_release(&diriter); + } + if (is_dir) { inode_dec_link_count(old_dir); if (new_inode) inode_dec_link_count(new_inode); From 7cf17d713ab39c4c78852b6be7706c4a326f210a Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 26 Nov 2024 12:55:12 +0100 Subject: [PATCH 081/216] udf: Verify inode link counts before performing rename [ Upstream commit 6756af923e06aa33ad8894aaecbf9060953ba00f ] During rename, we are updating link counts of various inodes either when rename deletes target or when moving directory across directories. Verify involved link counts are sane so that we don't trip warnings in VFS. Reported-by: syzbot+3ff7365dc04a6bcafa66@syzkaller.appspotmail.com Signed-off-by: Jan Kara Signed-off-by: Sasha Levin (cherry picked from commit b41d730552840397262294657a2ad8de82aaebd2) --- fs/udf/namei.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/udf/namei.c b/fs/udf/namei.c index 0461a7b1e9b41..8ac73f41d6ebe 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -792,8 +792,18 @@ static int udf_rename(struct mnt_idmap *idmap, struct inode *old_dir, retval = -ENOTEMPTY; if (!empty_dir(new_inode)) goto out_oiter; + retval = -EFSCORRUPTED; + if (new_inode->i_nlink != 2) + goto out_oiter; } + retval = -EFSCORRUPTED; + if (old_dir->i_nlink < 3) + goto out_oiter; is_dir = true; + } else if (new_inode) { + retval = -EFSCORRUPTED; + if (new_inode->i_nlink < 1) + goto out_oiter; } if (is_dir && old_dir != new_dir) { retval = udf_fiiter_find_entry(old_inode, &dotdot_name, From 90a0b6dc356bc0646aed3c2d021a2b7b4891050c Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 27 Feb 2024 09:52:43 +0100 Subject: [PATCH 082/216] ALSA: ump: Use guard() for locking [ Upstream commit 631896f7eaaf8cf8c639b065d3c9fbaa66da5d32 ] We can simplify the code gracefully with new guard() macro and co for automatic cleanup of locks. Only the code refactoring, and no functional changes. Signed-off-by: Takashi Iwai Link: https://lore.kernel.org/r/20240227085306.9764-2-tiwai@suse.de Stable-dep-of: 3978d53df723 ("ALSA: ump: Don't open legacy substream for an inactive group") Signed-off-by: Sasha Levin (cherry picked from commit b5e175e18a39c3190134dd3174d532a59c766a23) --- sound/core/ump.c | 35 ++++++++++++----------------------- 1 file changed, 12 insertions(+), 23 deletions(-) diff --git a/sound/core/ump.c b/sound/core/ump.c index 83856b2f88b89..ada2625ce78f7 100644 --- a/sound/core/ump.c +++ b/sound/core/ump.c @@ -1076,13 +1076,11 @@ static int snd_ump_legacy_open(struct snd_rawmidi_substream *substream) struct snd_ump_endpoint *ump = substream->rmidi->private_data; int dir = substream->stream; int group = ump->legacy_mapping[substream->number]; - int err = 0; + int err; - mutex_lock(&ump->open_mutex); - if (ump->legacy_substreams[dir][group]) { - err = -EBUSY; - goto unlock; - } + guard(mutex)(&ump->open_mutex); + if (ump->legacy_substreams[dir][group]) + return -EBUSY; if (dir == SNDRV_RAWMIDI_STREAM_OUTPUT) { if (!ump->legacy_out_opens) { err = snd_rawmidi_kernel_open(&ump->core, 0, @@ -1090,17 +1088,14 @@ static int snd_ump_legacy_open(struct snd_rawmidi_substream *substream) SNDRV_RAWMIDI_LFLG_APPEND, &ump->legacy_out_rfile); if (err < 0) - goto unlock; + return err; } ump->legacy_out_opens++; snd_ump_convert_reset(&ump->out_cvts[group]); } - spin_lock_irq(&ump->legacy_locks[dir]); + guard(spinlock_irq)(&ump->legacy_locks[dir]); ump->legacy_substreams[dir][group] = substream; - spin_unlock_irq(&ump->legacy_locks[dir]); - unlock: - mutex_unlock(&ump->open_mutex); - return err; + return 0; } static int snd_ump_legacy_close(struct snd_rawmidi_substream *substream) @@ -1109,15 +1104,13 @@ static int snd_ump_legacy_close(struct snd_rawmidi_substream *substream) int dir = substream->stream; int group = ump->legacy_mapping[substream->number]; - mutex_lock(&ump->open_mutex); - spin_lock_irq(&ump->legacy_locks[dir]); - ump->legacy_substreams[dir][group] = NULL; - spin_unlock_irq(&ump->legacy_locks[dir]); + guard(mutex)(&ump->open_mutex); + scoped_guard(spinlock_irq, &ump->legacy_locks[dir]) + ump->legacy_substreams[dir][group] = NULL; if (dir == SNDRV_RAWMIDI_STREAM_OUTPUT) { if (!--ump->legacy_out_opens) snd_rawmidi_kernel_release(&ump->legacy_out_rfile); } - mutex_unlock(&ump->open_mutex); return 0; } @@ -1169,12 +1162,11 @@ static int process_legacy_output(struct snd_ump_endpoint *ump, const int dir = SNDRV_RAWMIDI_STREAM_OUTPUT; unsigned char c; int group, size = 0; - unsigned long flags; if (!ump->out_cvts || !ump->legacy_out_opens) return 0; - spin_lock_irqsave(&ump->legacy_locks[dir], flags); + guard(spinlock_irqsave)(&ump->legacy_locks[dir]); for (group = 0; group < SNDRV_UMP_MAX_GROUPS; group++) { substream = ump->legacy_substreams[dir][group]; if (!substream) @@ -1190,7 +1182,6 @@ static int process_legacy_output(struct snd_ump_endpoint *ump, break; } } - spin_unlock_irqrestore(&ump->legacy_locks[dir], flags); return size; } @@ -1200,18 +1191,16 @@ static void process_legacy_input(struct snd_ump_endpoint *ump, const u32 *src, struct snd_rawmidi_substream *substream; unsigned char buf[16]; unsigned char group; - unsigned long flags; const int dir = SNDRV_RAWMIDI_STREAM_INPUT; int size; size = snd_ump_convert_from_ump(src, buf, &group); if (size <= 0) return; - spin_lock_irqsave(&ump->legacy_locks[dir], flags); + guard(spinlock_irqsave)(&ump->legacy_locks[dir]); substream = ump->legacy_substreams[dir][group]; if (substream) snd_rawmidi_receive(substream, buf, size); - spin_unlock_irqrestore(&ump->legacy_locks[dir], flags); } /* Fill ump->legacy_mapping[] for groups to be used for legacy rawmidi */ From d6185a3a4d416eed1eb2b957bcb912b64168d536 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 29 Nov 2024 10:45:42 +0100 Subject: [PATCH 083/216] ALSA: ump: Don't open legacy substream for an inactive group [ Upstream commit 3978d53df7236f0a517c2abeb43ddf6ac162cdd8 ] When a UMP Group is inactive, we shouldn't allow users to access it via the legacy MIDI access. Add the group active flag check and return -ENODEV if it's inactive. Link: https://patch.msgid.link/20241129094546.32119-2-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin (cherry picked from commit cf29cbf61cf2d914d4877b5e09c47cb19e744e3d) --- sound/core/ump.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/core/ump.c b/sound/core/ump.c index ada2625ce78f7..5a4a7d0b7cca4 100644 --- a/sound/core/ump.c +++ b/sound/core/ump.c @@ -1081,6 +1081,8 @@ static int snd_ump_legacy_open(struct snd_rawmidi_substream *substream) guard(mutex)(&ump->open_mutex); if (ump->legacy_substreams[dir][group]) return -EBUSY; + if (!ump->groups[group].active) + return -ENODEV; if (dir == SNDRV_RAWMIDI_STREAM_OUTPUT) { if (!ump->legacy_out_opens) { err = snd_rawmidi_kernel_open(&ump->core, 0, From 026198e58d474f016f818b09ec2178d53f7ac483 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 29 Nov 2024 10:45:43 +0100 Subject: [PATCH 084/216] ALSA: ump: Indicate the inactive group in legacy substream names [ Upstream commit e29e504e7890b9ee438ca6370d0180d607c473f9 ] Since the legacy rawmidi has no proper way to know the inactive group, indicate it in the rawmidi substream names with "[Inactive]" suffix when the corresponding UMP group is inactive. Link: https://patch.msgid.link/20241129094546.32119-3-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin (cherry picked from commit 9617001adfc9b402d7d80ef53c4cebf0c94624d7) --- sound/core/ump.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sound/core/ump.c b/sound/core/ump.c index 5a4a7d0b7cca4..bb94f119869a1 100644 --- a/sound/core/ump.c +++ b/sound/core/ump.c @@ -1245,8 +1245,9 @@ static void fill_substream_names(struct snd_ump_endpoint *ump, name = ump->groups[idx].name; if (!*name) name = ump->info.name; - snprintf(s->name, sizeof(s->name), "Group %d (%.16s)", - idx + 1, name); + snprintf(s->name, sizeof(s->name), "Group %d (%.16s)%s", + idx + 1, name, + ump->groups[idx].active ? "" : " [Inactive]"); } } From 0dcadf06a4e5c797d7633bc9dd5ad072bce2fe75 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 29 Nov 2024 10:45:44 +0100 Subject: [PATCH 085/216] ALSA: ump: Update legacy substream names upon FB info update [ Upstream commit edad3f9519fcacb926d0e3f3217aecaf628a593f ] The legacy rawmidi substreams should be updated when UMP FB Info or UMP FB Name are received, too. Link: https://patch.msgid.link/20241129094546.32119-4-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin (cherry picked from commit 8d891c866cf714b54165d56ad2afc3b4d708d053) --- sound/core/ump.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/sound/core/ump.c b/sound/core/ump.c index bb94f119869a1..4aec90dac07ee 100644 --- a/sound/core/ump.c +++ b/sound/core/ump.c @@ -37,6 +37,7 @@ static int process_legacy_output(struct snd_ump_endpoint *ump, u32 *buffer, int count); static void process_legacy_input(struct snd_ump_endpoint *ump, const u32 *src, int words); +static void update_legacy_names(struct snd_ump_endpoint *ump); #else static inline int process_legacy_output(struct snd_ump_endpoint *ump, u32 *buffer, int count) @@ -47,6 +48,9 @@ static inline void process_legacy_input(struct snd_ump_endpoint *ump, const u32 *src, int words) { } +static inline void update_legacy_names(struct snd_ump_endpoint *ump) +{ +} #endif static const struct snd_rawmidi_global_ops snd_ump_rawmidi_ops = { @@ -850,6 +854,7 @@ static int ump_handle_fb_info_msg(struct snd_ump_endpoint *ump, fill_fb_info(ump, &fb->info, buf); if (ump->parsed) { snd_ump_update_group_attrs(ump); + update_legacy_names(ump); seq_notify_fb_change(ump, fb); } } @@ -882,6 +887,7 @@ static int ump_handle_fb_name_msg(struct snd_ump_endpoint *ump, /* notify the FB name update to sequencer, too */ if (ret > 0 && ump->parsed) { snd_ump_update_group_attrs(ump); + update_legacy_names(ump); seq_notify_fb_change(ump, fb); } return ret; @@ -1251,6 +1257,14 @@ static void fill_substream_names(struct snd_ump_endpoint *ump, } } +static void update_legacy_names(struct snd_ump_endpoint *ump) +{ + struct snd_rawmidi *rmidi = ump->legacy_rmidi; + + fill_substream_names(ump, rmidi, SNDRV_RAWMIDI_STREAM_INPUT); + fill_substream_names(ump, rmidi, SNDRV_RAWMIDI_STREAM_OUTPUT); +} + int snd_ump_attach_legacy_rawmidi(struct snd_ump_endpoint *ump, char *id, int device) { @@ -1287,10 +1301,7 @@ int snd_ump_attach_legacy_rawmidi(struct snd_ump_endpoint *ump, rmidi->ops = &snd_ump_legacy_ops; rmidi->private_data = ump; ump->legacy_rmidi = rmidi; - if (input) - fill_substream_names(ump, rmidi, SNDRV_RAWMIDI_STREAM_INPUT); - if (output) - fill_substream_names(ump, rmidi, SNDRV_RAWMIDI_STREAM_OUTPUT); + update_legacy_names(ump); ump_dbg(ump, "Created a legacy rawmidi #%d (%s)\n", device, id); return 0; From 425fdadb3e009019bd7a578050666b5bd2288fce Mon Sep 17 00:00:00 2001 From: Guixin Liu Date: Fri, 29 Dec 2023 12:03:31 +0800 Subject: [PATCH 086/216] scsi: mpi3mr: Use ida to manage mrioc ID [ Upstream commit 29b75184f721b16c51ef6e67eec0e40ed88381c7 ] To ensure that the same ID is not obtained during concurrent execution of the probe, an ida is used to manage the mrioc's ID. Signed-off-by: Guixin Liu Link: https://lore.kernel.org/r/20231229040331.52518-1-kanie@linux.alibaba.com Reviewed-by: Lee Duncan Reviewed-by: Martin Wilck Signed-off-by: Martin K. Petersen Stable-dep-of: 0d32014f1e3e ("scsi: mpi3mr: Start controller indexing from 0") Signed-off-by: Sasha Levin (cherry picked from commit d424303d8d18392a43a38458aa86340d30be99aa) --- drivers/scsi/mpi3mr/mpi3mr_os.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/mpi3mr/mpi3mr_os.c b/drivers/scsi/mpi3mr/mpi3mr_os.c index 7f32619234696..3f86f1d0a9bea 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_os.c +++ b/drivers/scsi/mpi3mr/mpi3mr_os.c @@ -8,11 +8,12 @@ */ #include "mpi3mr.h" +#include /* global driver scop variables */ LIST_HEAD(mrioc_list); DEFINE_SPINLOCK(mrioc_list_lock); -static int mrioc_ids; +static DEFINE_IDA(mrioc_ida); static int warn_non_secure_ctlr; atomic64_t event_counter; @@ -5065,7 +5066,10 @@ mpi3mr_probe(struct pci_dev *pdev, const struct pci_device_id *id) } mrioc = shost_priv(shost); - mrioc->id = mrioc_ids++; + retval = ida_alloc_range(&mrioc_ida, 1, U8_MAX, GFP_KERNEL); + if (retval < 0) + goto id_alloc_failed; + mrioc->id = (u8)retval; sprintf(mrioc->driver_name, "%s", MPI3MR_DRIVER_NAME); sprintf(mrioc->name, "%s%d", mrioc->driver_name, mrioc->id); INIT_LIST_HEAD(&mrioc->list); @@ -5215,9 +5219,11 @@ mpi3mr_probe(struct pci_dev *pdev, const struct pci_device_id *id) resource_alloc_failed: destroy_workqueue(mrioc->fwevt_worker_thread); fwevtthread_failed: + ida_free(&mrioc_ida, mrioc->id); spin_lock(&mrioc_list_lock); list_del(&mrioc->list); spin_unlock(&mrioc_list_lock); +id_alloc_failed: scsi_host_put(shost); shost_failed: return retval; @@ -5303,6 +5309,7 @@ static void mpi3mr_remove(struct pci_dev *pdev) mrioc->sas_hba.num_phys = 0; } + ida_free(&mrioc_ida, mrioc->id); spin_lock(&mrioc_list_lock); list_del(&mrioc->list); spin_unlock(&mrioc_list_lock); @@ -5518,6 +5525,7 @@ static void __exit mpi3mr_exit(void) &driver_attr_event_counter); pci_unregister_driver(&mpi3mr_pci_driver); sas_release_transport(mpi3mr_transport_template); + ida_destroy(&mrioc_ida); } module_init(mpi3mr_init); From 8302306c1ed40c1a3796456fbda81e298ad79f29 Mon Sep 17 00:00:00 2001 From: Ranjan Kumar Date: Mon, 11 Nov 2024 01:14:03 +0530 Subject: [PATCH 087/216] scsi: mpi3mr: Start controller indexing from 0 [ Upstream commit 0d32014f1e3e7a7adf1583c45387f26b9bb3a49d ] Instead of displaying the controller index starting from '1' make the driver display the controller index starting from '0'. Signed-off-by: Sumit Saxena Signed-off-by: Ranjan Kumar Link: https://lore.kernel.org/r/20241110194405.10108-4-ranjan.kumar@broadcom.com Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin (cherry picked from commit f5a20424084f065c87d7e6b07de5000bcc819a38) --- drivers/scsi/mpi3mr/mpi3mr_os.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/mpi3mr/mpi3mr_os.c b/drivers/scsi/mpi3mr/mpi3mr_os.c index 3f86f1d0a9bea..7880675a68dba 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_os.c +++ b/drivers/scsi/mpi3mr/mpi3mr_os.c @@ -5066,7 +5066,7 @@ mpi3mr_probe(struct pci_dev *pdev, const struct pci_device_id *id) } mrioc = shost_priv(shost); - retval = ida_alloc_range(&mrioc_ida, 1, U8_MAX, GFP_KERNEL); + retval = ida_alloc_range(&mrioc_ida, 0, U8_MAX, GFP_KERNEL); if (retval < 0) goto id_alloc_failed; mrioc->id = (u8)retval; From 283acd87efd9a408ca07173045853ea0bb962752 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Wed, 31 Jul 2024 17:26:58 +0800 Subject: [PATCH 088/216] ACPI/IORT: Add PMCG platform information for HiSilicon HIP10/11 [ Upstream commit f3b78b470f28bb2a3a40e88bdf5c6de6a35a9b76 ] HiSilicon HIP10/11 platforms using the same SMMU PMCG with HIP09 and thus suffers the same erratum. List them in the PMCG platform information list without introducing a new SMMU PMCG Model. Update the silicon-errata.rst as well. Signed-off-by: Yicong Yang Link: https://lore.kernel.org/r/20240731092658.11012-1-yangyicong@huawei.com Signed-off-by: Will Deacon Stable-dep-of: c2b46ae02270 ("ACPI/IORT: Add PMCG platform information for HiSilicon HIP09A") Signed-off-by: Sasha Levin (cherry picked from commit 4252d023bae7d53ab4d75523e6655c1619782264) --- Documentation/arch/arm64/silicon-errata.rst | 4 ++-- drivers/acpi/arm64/iort.c | 7 +++++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/Documentation/arch/arm64/silicon-errata.rst b/Documentation/arch/arm64/silicon-errata.rst index 3cf806733083c..f4e6afd596308 100644 --- a/Documentation/arch/arm64/silicon-errata.rst +++ b/Documentation/arch/arm64/silicon-errata.rst @@ -244,8 +244,8 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | Hisilicon | Hip08 SMMU PMCG | #162001800 | N/A | +----------------+-----------------+-----------------+-----------------------------+ -| Hisilicon | Hip08 SMMU PMCG | #162001900 | N/A | -| | Hip09 SMMU PMCG | | | +| Hisilicon | Hip{08,09,10,10C| #162001900 | N/A | +| | ,11} SMMU PMCG | | | +----------------+-----------------+-----------------+-----------------------------+ +----------------+-----------------+-----------------+-----------------------------+ | Qualcomm Tech. | Kryo/Falkor v1 | E1003 | QCOM_FALKOR_ERRATUM_1003 | diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index 6496ff5a6ba20..b1f483845bc0c 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -1712,6 +1712,13 @@ static struct acpi_platform_list pmcg_plat_info[] __initdata = { /* HiSilicon Hip09 Platform */ {"HISI ", "HIP09 ", 0, ACPI_SIG_IORT, greater_than_or_equal, "Erratum #162001900", IORT_SMMU_V3_PMCG_HISI_HIP09}, + /* HiSilicon Hip10/11 Platform uses the same SMMU IP with Hip09 */ + {"HISI ", "HIP10 ", 0, ACPI_SIG_IORT, greater_than_or_equal, + "Erratum #162001900", IORT_SMMU_V3_PMCG_HISI_HIP09}, + {"HISI ", "HIP10C ", 0, ACPI_SIG_IORT, greater_than_or_equal, + "Erratum #162001900", IORT_SMMU_V3_PMCG_HISI_HIP09}, + {"HISI ", "HIP11 ", 0, ACPI_SIG_IORT, greater_than_or_equal, + "Erratum #162001900", IORT_SMMU_V3_PMCG_HISI_HIP09}, { } }; From 0a1f270eb7f8e39a99b68d157ab5a51963b8fa8e Mon Sep 17 00:00:00 2001 From: Qinxin Xia Date: Thu, 5 Dec 2024 09:33:31 +0800 Subject: [PATCH 089/216] ACPI/IORT: Add PMCG platform information for HiSilicon HIP09A [ Upstream commit c2b46ae022704a2d845e59461fa24431ad627022 ] HiSilicon HIP09A platforms using the same SMMU PMCG with HIP09 and thus suffers the same erratum. List them in the PMCG platform information list without introducing a new SMMU PMCG Model. Update the silicon-errata.rst as well. Reviewed-by: Yicong Yang Acked-by: Hanjun Guo Signed-off-by: Qinxin Xia Link: https://lore.kernel.org/r/20241205013331.1484017-1-xiaqinxin@huawei.com Signed-off-by: Catalin Marinas Signed-off-by: Sasha Levin (cherry picked from commit 48417c3426cf060a45b15f1c201e033542abb7e7) --- Documentation/arch/arm64/silicon-errata.rst | 5 +++-- drivers/acpi/arm64/iort.c | 2 ++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/Documentation/arch/arm64/silicon-errata.rst b/Documentation/arch/arm64/silicon-errata.rst index f4e6afd596308..8209c7a7c3970 100644 --- a/Documentation/arch/arm64/silicon-errata.rst +++ b/Documentation/arch/arm64/silicon-errata.rst @@ -244,8 +244,9 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | Hisilicon | Hip08 SMMU PMCG | #162001800 | N/A | +----------------+-----------------+-----------------+-----------------------------+ -| Hisilicon | Hip{08,09,10,10C| #162001900 | N/A | -| | ,11} SMMU PMCG | | | +| Hisilicon | Hip{08,09,09A,10| #162001900 | N/A | +| | ,10C,11} | | | +| | SMMU PMCG | | | +----------------+-----------------+-----------------+-----------------------------+ +----------------+-----------------+-----------------+-----------------------------+ | Qualcomm Tech. | Kryo/Falkor v1 | E1003 | QCOM_FALKOR_ERRATUM_1003 | diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index b1f483845bc0c..1a31106a14e44 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -1712,6 +1712,8 @@ static struct acpi_platform_list pmcg_plat_info[] __initdata = { /* HiSilicon Hip09 Platform */ {"HISI ", "HIP09 ", 0, ACPI_SIG_IORT, greater_than_or_equal, "Erratum #162001900", IORT_SMMU_V3_PMCG_HISI_HIP09}, + {"HISI ", "HIP09A ", 0, ACPI_SIG_IORT, greater_than_or_equal, + "Erratum #162001900", IORT_SMMU_V3_PMCG_HISI_HIP09}, /* HiSilicon Hip10/11 Platform uses the same SMMU IP with Hip09 */ {"HISI ", "HIP10 ", 0, ACPI_SIG_IORT, greater_than_or_equal, "Erratum #162001900", IORT_SMMU_V3_PMCG_HISI_HIP09}, From 7d27e4492ad575d42496170c5007c9ff4c384306 Mon Sep 17 00:00:00 2001 From: Xin Li Date: Tue, 5 Dec 2023 02:50:02 -0800 Subject: [PATCH 090/216] x86/ptrace: Cleanup the definition of the pt_regs structure [ Upstream commit ee63291aa8287cb7ded767d340155fe8681fc075 ] struct pt_regs is hard to read because the member or section related comments are not aligned with the members. The 'cs' and 'ss' members of pt_regs are type of 'unsigned long' while in reality they are only 16-bit wide. This works so far as the remaining space is unused, but FRED will use the remaining bits for other purposes. To prepare for FRED: - Cleanup the formatting - Convert 'cs' and 'ss' to u16 and embed them into an union with a u64 - Fixup the related printk() format strings Suggested-by: Thomas Gleixner Originally-by: H. Peter Anvin (Intel) Signed-off-by: Xin Li Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov (AMD) Tested-by: Shan Kang Link: https://lore.kernel.org/r/20231205105030.8698-14-xin3.li@intel.com Stable-dep-of: dc81e556f2a0 ("x86/fred: Clear WFE in missing-ENDBRANCH #CPs") Signed-off-by: Sasha Levin (cherry picked from commit 498bdedca58aab8360e8a75208dde175770d00a5) --- arch/x86/entry/vsyscall/vsyscall_64.c | 2 +- arch/x86/include/asm/ptrace.h | 48 +++++++++++++++++++-------- arch/x86/kernel/process_64.c | 2 +- 3 files changed, 37 insertions(+), 15 deletions(-) diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index 1245000a8792f..2fb7d53cf3338 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -76,7 +76,7 @@ static void warn_bad_vsyscall(const char *level, struct pt_regs *regs, if (!show_unhandled_signals) return; - printk_ratelimited("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n", + printk_ratelimited("%s%s[%d] %s ip:%lx cs:%x sp:%lx ax:%lx si:%lx di:%lx\n", level, current->comm, task_pid_nr(current), message, regs->ip, regs->cs, regs->sp, regs->ax, regs->si, regs->di); diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index f4db78b09c8f0..b268cd2a2d01c 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -57,17 +57,19 @@ struct pt_regs { #else /* __i386__ */ struct pt_regs { -/* - * C ABI says these regs are callee-preserved. They aren't saved on kernel entry - * unless syscall needs a complete, fully filled "struct pt_regs". - */ + /* + * C ABI says these regs are callee-preserved. They aren't saved on + * kernel entry unless syscall needs a complete, fully filled + * "struct pt_regs". + */ unsigned long r15; unsigned long r14; unsigned long r13; unsigned long r12; unsigned long bp; unsigned long bx; -/* These regs are callee-clobbered. Always saved on kernel entry. */ + + /* These regs are callee-clobbered. Always saved on kernel entry. */ unsigned long r11; unsigned long r10; unsigned long r9; @@ -77,18 +79,38 @@ struct pt_regs { unsigned long dx; unsigned long si; unsigned long di; -/* - * On syscall entry, this is syscall#. On CPU exception, this is error code. - * On hw interrupt, it's IRQ number: - */ + + /* + * orig_ax is used on entry for: + * - the syscall number (syscall, sysenter, int80) + * - error_code stored by the CPU on traps and exceptions + * - the interrupt number for device interrupts + */ unsigned long orig_ax; -/* Return frame for iretq */ + + /* The IRETQ return frame starts here */ unsigned long ip; - unsigned long cs; + + union { + /* The full 64-bit data slot containing CS */ + u64 csx; + /* CS selector */ + u16 cs; + }; + unsigned long flags; unsigned long sp; - unsigned long ss; -/* top of stack page */ + + union { + /* The full 64-bit data slot containing SS */ + u64 ssx; + /* SS selector */ + u16 ss; + }; + + /* + * Top of stack on IDT systems. + */ }; #endif /* !__i386__ */ diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index d595ef7c1de05..dd19a4db741af 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -117,7 +117,7 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode, printk("%sFS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", log_lvl, fs, fsindex, gs, gsindex, shadowgs); - printk("%sCS: %04lx DS: %04x ES: %04x CR0: %016lx\n", + printk("%sCS: %04x DS: %04x ES: %04x CR0: %016lx\n", log_lvl, regs->cs, ds, es, cr0); printk("%sCR2: %016lx CR3: %016lx CR4: %016lx\n", log_lvl, cr2, cr3, cr4); From ca5d088a892620a33cdd6bd6870bc30becfa4fab Mon Sep 17 00:00:00 2001 From: Xin Li Date: Tue, 5 Dec 2023 02:50:03 -0800 Subject: [PATCH 091/216] x86/ptrace: Add FRED additional information to the pt_regs structure [ Upstream commit 3c77bf02d0c03beb3efdf7a5b427fb2e1a76c265 ] FRED defines additional information in the upper 48 bits of cs/ss fields. Therefore add the information definitions into the pt_regs structure. Specifically introduce a new structure fred_ss to denote the FRED flags above SS selector, which avoids FRED_SSX_ macros and makes the code simpler and easier to read. Suggested-by: Thomas Gleixner Originally-by: H. Peter Anvin (Intel) Signed-off-by: Xin Li Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov (AMD) Tested-by: Shan Kang Link: https://lore.kernel.org/r/20231205105030.8698-15-xin3.li@intel.com Stable-dep-of: dc81e556f2a0 ("x86/fred: Clear WFE in missing-ENDBRANCH #CPs") Signed-off-by: Sasha Levin (cherry picked from commit 9c268be377e78cab2a48ee9d044c4d391a64debf) --- arch/x86/include/asm/ptrace.h | 66 ++++++++++++++++++++++++++++++++--- 1 file changed, 61 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index b268cd2a2d01c..5a83fbd9bc0b4 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -56,6 +56,50 @@ struct pt_regs { #else /* __i386__ */ +struct fred_cs { + /* CS selector */ + u64 cs : 16, + /* Stack level at event time */ + sl : 2, + /* IBT in WAIT_FOR_ENDBRANCH state */ + wfe : 1, + : 45; +}; + +struct fred_ss { + /* SS selector */ + u64 ss : 16, + /* STI state */ + sti : 1, + /* Set if syscall, sysenter or INT n */ + swevent : 1, + /* Event is NMI type */ + nmi : 1, + : 13, + /* Event vector */ + vector : 8, + : 8, + /* Event type */ + type : 4, + : 4, + /* Event was incident to enclave execution */ + enclave : 1, + /* CPU was in long mode */ + lm : 1, + /* + * Nested exception during FRED delivery, not set + * for #DF. + */ + nested : 1, + : 1, + /* + * The length of the instruction causing the event. + * Only set for INTO, INT1, INT3, INT n, SYSCALL + * and SYSENTER. 0 otherwise. + */ + insnlen : 4; +}; + struct pt_regs { /* * C ABI says these regs are callee-preserved. They aren't saved on @@ -85,6 +129,12 @@ struct pt_regs { * - the syscall number (syscall, sysenter, int80) * - error_code stored by the CPU on traps and exceptions * - the interrupt number for device interrupts + * + * A FRED stack frame starts here: + * 1) It _always_ includes an error code; + * + * 2) The return frame for ERET[US] starts here, but + * the content of orig_ax is ignored. */ unsigned long orig_ax; @@ -92,24 +142,30 @@ struct pt_regs { unsigned long ip; union { - /* The full 64-bit data slot containing CS */ - u64 csx; /* CS selector */ u16 cs; + /* The extended 64-bit data slot containing CS */ + u64 csx; + /* The FRED CS extension */ + struct fred_cs fred_cs; }; unsigned long flags; unsigned long sp; union { - /* The full 64-bit data slot containing SS */ - u64 ssx; /* SS selector */ u16 ss; + /* The extended 64-bit data slot containing SS */ + u64 ssx; + /* The FRED SS extension */ + struct fred_ss fred_ss; }; /* - * Top of stack on IDT systems. + * Top of stack on IDT systems, while FRED systems have extra fields + * defined above for storing exception related information, e.g. CR2 or + * DR6. */ }; From 1223c4903f7de2abfe135b551f0da45a34c19c65 Mon Sep 17 00:00:00 2001 From: "Xin Li (Intel)" Date: Wed, 13 Nov 2024 09:59:34 -0800 Subject: [PATCH 092/216] x86/fred: Clear WFE in missing-ENDBRANCH #CPs [ Upstream commit dc81e556f2a017d681251ace21bf06c126d5a192 ] An indirect branch instruction sets the CPU indirect branch tracker (IBT) into WAIT_FOR_ENDBRANCH (WFE) state and WFE stays asserted across the instruction boundary. When the decoder finds an inappropriate instruction while WFE is set ENDBR, the CPU raises a #CP fault. For the "kernel IBT no ENDBR" selftest where #CPs are deliberately triggered, the WFE state of the interrupted context needs to be cleared to let execution continue. Otherwise when the CPU resumes from the instruction that just caused the previous #CP, another missing-ENDBRANCH #CP is raised and the CPU enters a dead loop. This is not a problem with IDT because it doesn't preserve WFE and IRET doesn't set WFE. But FRED provides space on the entry stack (in an expanded CS area) to save and restore the WFE state, thus the WFE state is no longer clobbered, so software must clear it. Clear WFE to avoid dead looping in ibt_clear_fred_wfe() and the !ibt_fatal code path when execution is allowed to continue. Clobbering WFE in any other circumstance is a security-relevant bug. [ dhansen: changelog rewording ] Fixes: a5f6c2ace997 ("x86/shstk: Add user control-protection fault handler") Signed-off-by: Xin Li (Intel) Signed-off-by: Dave Hansen Signed-off-by: Ingo Molnar Acked-by: Dave Hansen Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20241113175934.3897541-1-xin%40zytor.com Signed-off-by: Sasha Levin (cherry picked from commit 151447859d6fb0dcce8259f0971c6e94fb801661) --- arch/x86/kernel/cet.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/arch/x86/kernel/cet.c b/arch/x86/kernel/cet.c index d2c732a34e5d9..303bf74d175b3 100644 --- a/arch/x86/kernel/cet.c +++ b/arch/x86/kernel/cet.c @@ -81,6 +81,34 @@ static void do_user_cp_fault(struct pt_regs *regs, unsigned long error_code) static __ro_after_init bool ibt_fatal = true; +/* + * By definition, all missing-ENDBRANCH #CPs are a result of WFE && !ENDBR. + * + * For the kernel IBT no ENDBR selftest where #CPs are deliberately triggered, + * the WFE state of the interrupted context needs to be cleared to let execution + * continue. Otherwise when the CPU resumes from the instruction that just + * caused the previous #CP, another missing-ENDBRANCH #CP is raised and the CPU + * enters a dead loop. + * + * This is not a problem with IDT because it doesn't preserve WFE and IRET doesn't + * set WFE. But FRED provides space on the entry stack (in an expanded CS area) + * to save and restore the WFE state, thus the WFE state is no longer clobbered, + * so software must clear it. + */ +static void ibt_clear_fred_wfe(struct pt_regs *regs) +{ + /* + * No need to do any FRED checks. + * + * For IDT event delivery, the high-order 48 bits of CS are pushed + * as 0s into the stack, and later IRET ignores these bits. + * + * For FRED, a test to check if fred_cs.wfe is set would be dropped + * by compilers. + */ + regs->fred_cs.wfe = 0; +} + static void do_kernel_cp_fault(struct pt_regs *regs, unsigned long error_code) { if ((error_code & CP_EC) != CP_ENDBR) { @@ -90,6 +118,7 @@ static void do_kernel_cp_fault(struct pt_regs *regs, unsigned long error_code) if (unlikely(regs->ip == (unsigned long)&ibt_selftest_noendbr)) { regs->ax = 0; + ibt_clear_fred_wfe(regs); return; } @@ -97,6 +126,7 @@ static void do_kernel_cp_fault(struct pt_regs *regs, unsigned long error_code) if (!ibt_fatal) { printk(KERN_DEFAULT CUT_HERE); __warn(__FILE__, __LINE__, (void *)regs->ip, TAINT_WARN, regs, NULL); + ibt_clear_fred_wfe(regs); return; } BUG(); From 31fd301f15bd29f51c3f05e3c01a048baa486012 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 27 Sep 2023 12:09:26 +0100 Subject: [PATCH 093/216] btrfs: rename and export __btrfs_cow_block() [ Upstream commit 95f93bc4cbcac6121a5ee85cd5019ee8e7447e0b ] Rename and export __btrfs_cow_block() as btrfs_force_cow_block(). This is to allow to move defrag specific code out of ctree.c and into defrag.c in one of the next patches. Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba Stable-dep-of: 44f52bbe96df ("btrfs: fix use-after-free when COWing tree bock and tracing is enabled") Signed-off-by: Sasha Levin (cherry picked from commit 0d2cc60b44d09734d053fbb1527408cfb9cf0484) --- fs/btrfs/ctree.c | 30 +++++++++++++++--------------- fs/btrfs/ctree.h | 7 +++++++ 2 files changed, 22 insertions(+), 15 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 25c902e7556d5..62032d3fda85e 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -526,13 +526,13 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, * bytes the allocator should try to find free next to the block it returns. * This is just a hint and may be ignored by the allocator. */ -static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct extent_buffer *buf, - struct extent_buffer *parent, int parent_slot, - struct extent_buffer **cow_ret, - u64 search_start, u64 empty_size, - enum btrfs_lock_nesting nest) +int btrfs_force_cow_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *buf, + struct extent_buffer *parent, int parent_slot, + struct extent_buffer **cow_ret, + u64 search_start, u64 empty_size, + enum btrfs_lock_nesting nest) { struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_disk_key disk_key; @@ -699,7 +699,7 @@ static inline int should_cow_block(struct btrfs_trans_handle *trans, } /* - * cows a single block, see __btrfs_cow_block for the real work. + * COWs a single block, see btrfs_force_cow_block() for the real work. * This version of it has extra checks so that a block isn't COWed more than * once per transaction, as long as it hasn't been written yet */ @@ -752,8 +752,8 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, * Also We don't care about the error, as it's handled internally. */ btrfs_qgroup_trace_subtree_after_cow(trans, root, buf); - ret = __btrfs_cow_block(trans, root, buf, parent, - parent_slot, cow_ret, search_start, 0, nest); + ret = btrfs_force_cow_block(trans, root, buf, parent, parent_slot, + cow_ret, search_start, 0, nest); trace_btrfs_cow_block(root, buf, *cow_ret); @@ -904,11 +904,11 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, search_start = last_block; btrfs_tree_lock(cur); - err = __btrfs_cow_block(trans, root, cur, parent, i, - &cur, search_start, - min(16 * blocksize, - (end_slot - i) * blocksize), - BTRFS_NESTING_COW); + err = btrfs_force_cow_block(trans, root, cur, parent, i, + &cur, search_start, + min(16 * blocksize, + (end_slot - i) * blocksize), + BTRFS_NESTING_COW); if (err) { btrfs_tree_unlock(cur); free_extent_buffer(cur); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index f7bb4c34b984b..7df3ed2945b04 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -538,6 +538,13 @@ int btrfs_cow_block(struct btrfs_trans_handle *trans, struct extent_buffer *parent, int parent_slot, struct extent_buffer **cow_ret, enum btrfs_lock_nesting nest); +int btrfs_force_cow_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *buf, + struct extent_buffer *parent, int parent_slot, + struct extent_buffer **cow_ret, + u64 search_start, u64 empty_size, + enum btrfs_lock_nesting nest); int btrfs_copy_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, From 7db3a333c99f82af982fb8efa0ff695b54152ba2 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 11 Dec 2024 16:08:07 +0000 Subject: [PATCH 094/216] btrfs: fix use-after-free when COWing tree bock and tracing is enabled [ Upstream commit 44f52bbe96dfdbe4aca3818a2534520082a07040 ] When a COWing a tree block, at btrfs_cow_block(), and we have the tracepoint trace_btrfs_cow_block() enabled and preemption is also enabled (CONFIG_PREEMPT=y), we can trigger a use-after-free in the COWed extent buffer while inside the tracepoint code. This is because in some paths that call btrfs_cow_block(), such as btrfs_search_slot(), we are holding the last reference on the extent buffer @buf so btrfs_force_cow_block() drops the last reference on the @buf extent buffer when it calls free_extent_buffer_stale(buf), which schedules the release of the extent buffer with RCU. This means that if we are on a kernel with preemption, the current task may be preempted before calling trace_btrfs_cow_block() and the extent buffer already released by the time trace_btrfs_cow_block() is called, resulting in a use-after-free. Fix this by moving the trace_btrfs_cow_block() from btrfs_cow_block() to btrfs_force_cow_block() before the COWed extent buffer is freed. This also has a side effect of invoking the tracepoint in the tree defrag code, at defrag.c:btrfs_realloc_node(), since btrfs_force_cow_block() is called there, but this is fine and it was actually missing there. Reported-by: syzbot+8517da8635307182c8a5@syzkaller.appspotmail.com Link: https://lore.kernel.org/linux-btrfs/6759a9b9.050a0220.1ac542.000d.GAE@google.com/ CC: stable@vger.kernel.org # 5.4+ Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Sasha Levin (cherry picked from commit 9a466b8693b9add05de99af00c7bdff8259ecf19) --- fs/btrfs/ctree.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 62032d3fda85e..4b21ca49b6665 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -660,6 +660,8 @@ int btrfs_force_cow_block(struct btrfs_trans_handle *trans, return ret; } } + + trace_btrfs_cow_block(root, buf, cow); if (unlock_orig) btrfs_tree_unlock(buf); free_extent_buffer_stale(buf); @@ -711,7 +713,6 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, { struct btrfs_fs_info *fs_info = root->fs_info; u64 search_start; - int ret; if (unlikely(test_bit(BTRFS_ROOT_DELETING, &root->state))) { btrfs_abort_transaction(trans, -EUCLEAN); @@ -752,12 +753,8 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, * Also We don't care about the error, as it's handled internally. */ btrfs_qgroup_trace_subtree_after_cow(trans, root, buf); - ret = btrfs_force_cow_block(trans, root, buf, parent, parent_slot, - cow_ret, search_start, 0, nest); - - trace_btrfs_cow_block(root, buf, *cow_ret); - - return ret; + return btrfs_force_cow_block(trans, root, buf, parent, parent_slot, + cow_ret, search_start, 0, nest); } ALLOW_ERROR_INJECTION(btrfs_cow_block, ERRNO); From cee919402eac542d2e7160a2fe4f6e4f374e5ce9 Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Mon, 23 Sep 2024 16:47:03 +0800 Subject: [PATCH 095/216] Bluetooth: btusb: mediatek: add callback function in btusb_disconnect [ Upstream commit cea1805f165cdd783dd21f26df957118cb8641b4 ] Add disconnect callback function in btusb_disconnect which is reserved for vendor specific usage before deregister hci in btusb_disconnect. Signed-off-by: Chris Lu Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin (cherry picked from commit 9457d783fb94527c215e32a97225aed7c88d979f) --- drivers/bluetooth/btusb.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 58599c86f8953..9ecd6c48aa8b1 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -873,6 +873,7 @@ struct btusb_data { int (*suspend)(struct hci_dev *hdev); int (*resume)(struct hci_dev *hdev); + int (*disconnect)(struct hci_dev *hdev); int oob_wake_irq; /* irq for out-of-band wake-on-bt */ unsigned cmd_timeout_cnt; @@ -4042,6 +4043,9 @@ static void btusb_disconnect(struct usb_interface *intf) if (data->diag) usb_set_intfdata(data->diag, NULL); + if (data->disconnect) + data->disconnect(hdev); + hci_unregister_dev(hdev); if (intf == data->intf) { From 9626436bc2d8363cf93219d3794f845b0b9f0cb0 Mon Sep 17 00:00:00 2001 From: Stefan Berger Date: Thu, 9 May 2024 21:59:21 -0400 Subject: [PATCH 096/216] crypto: ecc - Prevent ecc_digits_from_bytes from reading too many bytes [ Upstream commit c6ab5c915da460c0397960af3c308386c3f3247b ] Prevent ecc_digits_from_bytes from reading too many bytes from the input byte array in case an insufficient number of bytes is provided to fill the output digit array of ndigits. Therefore, initialize the most significant digits with 0 to avoid trying to read too many bytes later on. Convert the function into a regular function since it is getting too big for an inline function. If too many bytes are provided on the input byte array the extra bytes are ignored since the input variable 'ndigits' limits the number of digits that will be filled. Fixes: d67c96fb97b5 ("crypto: ecdsa - Convert byte arrays with key coordinates to digits") Reviewed-by: Jarkko Sakkinen Signed-off-by: Stefan Berger Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin (cherry picked from commit 55779f26eab9af12474a447001bd17070f055712) --- crypto/ecc.c | 22 ++++++++++++++++++++++ include/crypto/internal/ecc.h | 15 ++------------- 2 files changed, 24 insertions(+), 13 deletions(-) diff --git a/crypto/ecc.c b/crypto/ecc.c index f53fb4d6af992..21504280aca2e 100644 --- a/crypto/ecc.c +++ b/crypto/ecc.c @@ -66,6 +66,28 @@ const struct ecc_curve *ecc_get_curve(unsigned int curve_id) } EXPORT_SYMBOL(ecc_get_curve); +void ecc_digits_from_bytes(const u8 *in, unsigned int nbytes, + u64 *out, unsigned int ndigits) +{ + int diff = ndigits - DIV_ROUND_UP(nbytes, sizeof(u64)); + unsigned int o = nbytes & 7; + __be64 msd = 0; + + /* diff > 0: not enough input bytes: set most significant digits to 0 */ + if (diff > 0) { + ndigits -= diff; + memset(&out[ndigits - 1], 0, diff * sizeof(u64)); + } + + if (o) { + memcpy((u8 *)&msd + sizeof(msd) - o, in, o); + out[--ndigits] = be64_to_cpu(msd); + in += o; + } + ecc_swap_digits(in, out, ndigits); +} +EXPORT_SYMBOL(ecc_digits_from_bytes); + static u64 *ecc_alloc_digits_space(unsigned int ndigits) { size_t len = ndigits * sizeof(u64); diff --git a/include/crypto/internal/ecc.h b/include/crypto/internal/ecc.h index ab722a8986b76..c0b8be63cbde7 100644 --- a/include/crypto/internal/ecc.h +++ b/include/crypto/internal/ecc.h @@ -63,19 +63,8 @@ static inline void ecc_swap_digits(const void *in, u64 *out, unsigned int ndigit * @out Output digits array * @ndigits: Number of digits to create from byte array */ -static inline void ecc_digits_from_bytes(const u8 *in, unsigned int nbytes, - u64 *out, unsigned int ndigits) -{ - unsigned int o = nbytes & 7; - __be64 msd = 0; - - if (o) { - memcpy((u8 *)&msd + sizeof(msd) - o, in, o); - out[--ndigits] = be64_to_cpu(msd); - in += o; - } - ecc_swap_digits(in, out, ndigits); -} +void ecc_digits_from_bytes(const u8 *in, unsigned int nbytes, + u64 *out, unsigned int ndigits); /** * ecc_is_key_valid() - Validate a given ECDH private key From 52cfbb1f5f1811f49eadeaef93540ca809b37e62 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Mon, 19 Aug 2024 09:41:15 +0200 Subject: [PATCH 097/216] cleanup: Remove address space of returned pointer [ Upstream commit f730fd535fc51573f982fad629f2fc6b4a0cde2f ] Guard functions in local_lock.h are defined using DEFINE_GUARD() and DEFINE_LOCK_GUARD_1() macros having lock type defined as pointer in the percpu address space. The functions, defined by these macros return value in generic address space, causing: cleanup.h:157:18: error: return from pointer to non-enclosed address space and cleanup.h:214:18: error: return from pointer to non-enclosed address space when strict percpu checks are enabled. Add explicit casts to remove address space of the returned pointer. Found by GCC's named address space checks. Fixes: e4ab322fbaaa ("cleanup: Add conditional guard support") Signed-off-by: Uros Bizjak Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20240819074124.143565-1-ubizjak@gmail.com Signed-off-by: Sasha Levin (cherry picked from commit 8c5ad189e90f5f1470c7c93b22320976dc1dc777) --- include/linux/cleanup.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h index f0c6d1d45e679..64b8600eb8c0e 100644 --- a/include/linux/cleanup.h +++ b/include/linux/cleanup.h @@ -123,7 +123,7 @@ static __maybe_unused const bool class_##_name##_is_conditional = _is_cond __DEFINE_CLASS_IS_CONDITIONAL(_name, false); \ DEFINE_CLASS(_name, _type, if (_T) { _unlock; }, ({ _lock; _T; }), _type _T); \ static inline void * class_##_name##_lock_ptr(class_##_name##_t *_T) \ - { return *_T; } + { return (void *)(__force unsigned long)*_T; } #define DEFINE_GUARD_COND(_name, _ext, _condlock) \ __DEFINE_CLASS_IS_CONDITIONAL(_name##_ext, true); \ @@ -204,7 +204,7 @@ static inline void class_##_name##_destructor(class_##_name##_t *_T) \ \ static inline void *class_##_name##_lock_ptr(class_##_name##_t *_T) \ { \ - return _T->lock; \ + return (void *)(__force unsigned long)_T->lock; \ } From e42cbb2722c8dc12fc8674beed6b281f7cc18782 Mon Sep 17 00:00:00 2001 From: Yihang Li Date: Mon, 22 Jan 2024 14:25:44 +0800 Subject: [PATCH 098/216] scsi: hisi_sas: Fix a deadlock issue related to automatic dump [ Upstream commit 3c4f53b2c341ec6428b98cb51a89a09b025d0953 ] If we issue a disabling PHY command, the device attached with it will go offline, if a 2 bit ECC error occurs at the same time, a hung task may be found: [ 4613.652388] INFO: task kworker/u256:0:165233 blocked for more than 120 seconds. [ 4613.666297] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 4613.674809] task:kworker/u256:0 state:D stack: 0 pid:165233 ppid: 2 flags:0x00000208 [ 4613.683959] Workqueue: 0000:74:02.0_disco_q sas_revalidate_domain [libsas] [ 4613.691518] Call trace: [ 4613.694678] __switch_to+0xf8/0x17c [ 4613.698872] __schedule+0x660/0xee0 [ 4613.703063] schedule+0xac/0x240 [ 4613.706994] schedule_timeout+0x500/0x610 [ 4613.711705] __down+0x128/0x36c [ 4613.715548] down+0x240/0x2d0 [ 4613.719221] hisi_sas_internal_abort_timeout+0x1bc/0x260 [hisi_sas_main] [ 4613.726618] sas_execute_internal_abort+0x144/0x310 [libsas] [ 4613.732976] sas_execute_internal_abort_dev+0x44/0x60 [libsas] [ 4613.739504] hisi_sas_internal_task_abort_dev.isra.0+0xbc/0x1b0 [hisi_sas_main] [ 4613.747499] hisi_sas_dev_gone+0x174/0x250 [hisi_sas_main] [ 4613.753682] sas_notify_lldd_dev_gone+0xec/0x2e0 [libsas] [ 4613.759781] sas_unregister_common_dev+0x4c/0x7a0 [libsas] [ 4613.765962] sas_destruct_devices+0xb8/0x120 [libsas] [ 4613.771709] sas_do_revalidate_domain.constprop.0+0x1b8/0x31c [libsas] [ 4613.778930] sas_revalidate_domain+0x60/0xa4 [libsas] [ 4613.784716] process_one_work+0x248/0x950 [ 4613.789424] worker_thread+0x318/0x934 [ 4613.793878] kthread+0x190/0x200 [ 4613.797810] ret_from_fork+0x10/0x18 [ 4613.802121] INFO: task kworker/u256:4:316722 blocked for more than 120 seconds. [ 4613.816026] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 4613.824538] task:kworker/u256:4 state:D stack: 0 pid:316722 ppid: 2 flags:0x00000208 [ 4613.833670] Workqueue: 0000:74:02.0 hisi_sas_rst_work_handler [hisi_sas_main] [ 4613.841491] Call trace: [ 4613.844647] __switch_to+0xf8/0x17c [ 4613.848852] __schedule+0x660/0xee0 [ 4613.853052] schedule+0xac/0x240 [ 4613.856984] schedule_timeout+0x500/0x610 [ 4613.861695] __down+0x128/0x36c [ 4613.865542] down+0x240/0x2d0 [ 4613.869216] hisi_sas_controller_prereset+0x58/0x1fc [hisi_sas_main] [ 4613.876324] hisi_sas_rst_work_handler+0x40/0x8c [hisi_sas_main] [ 4613.883019] process_one_work+0x248/0x950 [ 4613.887732] worker_thread+0x318/0x934 [ 4613.892204] kthread+0x190/0x200 [ 4613.896118] ret_from_fork+0x10/0x18 [ 4613.900423] INFO: task kworker/u256:1:348985 blocked for more than 121 seconds. [ 4613.914341] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 4613.922852] task:kworker/u256:1 state:D stack: 0 pid:348985 ppid: 2 flags:0x00000208 [ 4613.931984] Workqueue: 0000:74:02.0_event_q sas_port_event_worker [libsas] [ 4613.939549] Call trace: [ 4613.942702] __switch_to+0xf8/0x17c [ 4613.946892] __schedule+0x660/0xee0 [ 4613.951083] schedule+0xac/0x240 [ 4613.955015] schedule_timeout+0x500/0x610 [ 4613.959725] wait_for_common+0x200/0x610 [ 4613.964349] wait_for_completion+0x3c/0x5c [ 4613.969146] flush_workqueue+0x198/0x790 [ 4613.973776] sas_porte_broadcast_rcvd+0x1e8/0x320 [libsas] [ 4613.979960] sas_port_event_worker+0x54/0xa0 [libsas] [ 4613.985708] process_one_work+0x248/0x950 [ 4613.990420] worker_thread+0x318/0x934 [ 4613.994868] kthread+0x190/0x200 [ 4613.998800] ret_from_fork+0x10/0x18 This is because when the device goes offline, we obtain the hisi_hba semaphore and send the ABORT_DEV command to the device. However, the internal abort timed out due to the 2 bit ECC error and triggers automatic dump. In addition, since the hisi_hba semaphore has been obtained, the dump cannot be executed and the controller cannot be reset. Therefore, the deadlocks occur on the following circular dependencies: hisi_sas_dev_gone() -> down() -> hisi_sas_internal_task_abort_dev() -> ... -> hisi_sas_internal_abort_timeout() -> down(). The deadlock is triggered only when the timeout occurs during device goes offline. To fix this issue, use .rst_ha_timeout to distinguish the scenario where a device goes offline from other scenarios. Fixes: 2ff07b5c6fe9 ("scsi: hisi_sas: Directly call register snapshot instead of using workqueue") Signed-off-by: Yihang Li Signed-off-by: Xiang Chen Link: https://lore.kernel.org/r/1705904747-62186-2-git-send-email-chenxiang66@hisilicon.com Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin (cherry picked from commit a47f0b03149af538af4442ff0702eac430ace1cb) --- drivers/scsi/hisi_sas/hisi_sas_main.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index 5fdba7b39a1b2..4ce737ddb058b 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -1968,9 +1968,17 @@ static bool hisi_sas_internal_abort_timeout(struct sas_task *task, struct hisi_sas_internal_abort_data *timeout = data; if (hisi_sas_debugfs_enable && hisi_hba->debugfs_itct[0].itct) { - down(&hisi_hba->sem); + /* + * If timeout occurs in device gone scenario, to avoid + * circular dependency like: + * hisi_sas_dev_gone() -> down() -> ... -> + * hisi_sas_internal_abort_timeout() -> down(). + */ + if (!timeout->rst_ha_timeout) + down(&hisi_hba->sem); hisi_hba->hw->debugfs_snapshot_regs(hisi_hba); - up(&hisi_hba->sem); + if (!timeout->rst_ha_timeout) + up(&hisi_hba->sem); } if (task->task_state_flags & SAS_TASK_STATE_DONE) { From 94de43c05cf51a3d96db44378e61e3faf754ca83 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sat, 9 Nov 2024 02:04:14 +0200 Subject: [PATCH 099/216] usb: typec: ucsi: glink: fix off-by-one in connector_status [ Upstream commit 4a22918810980897393fa1776ea3877e4baf8cca ] UCSI connector's indices start from 1 up to 3, PMIC_GLINK_MAX_PORTS. Correct the condition in the pmic_glink_ucsi_connector_status() callback, fixing Type-C orientation reporting for the third USB-C connector. Fixes: 76716fd5bf09 ("usb: typec: ucsi: glink: move GPIO reading into connector_status callback") Cc: stable@vger.kernel.org Reported-by: Abel Vesa Reviewed-by: Neil Armstrong Reviewed-by: Johan Hovold Tested-by: Johan Hovold Signed-off-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20241109-ucsi-glue-fixes-v2-1-8b21ff4f9fbe@linaro.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin (cherry picked from commit 8a2273e5c1beb285729aa001422967b4711c53fe) --- drivers/usb/typec/ucsi/ucsi_glink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/typec/ucsi/ucsi_glink.c b/drivers/usb/typec/ucsi/ucsi_glink.c index f0b4d0a4bb198..82a1081d44f1f 100644 --- a/drivers/usb/typec/ucsi/ucsi_glink.c +++ b/drivers/usb/typec/ucsi/ucsi_glink.c @@ -202,7 +202,7 @@ static void pmic_glink_ucsi_connector_status(struct ucsi_connector *con) struct pmic_glink_ucsi *ucsi = ucsi_get_drvdata(con->ucsi); int orientation; - if (con->num >= PMIC_GLINK_MAX_PORTS || + if (con->num > PMIC_GLINK_MAX_PORTS || !ucsi->port_orientation[con->num - 1]) return; From 83ecbb6fec736b72f7660a99737e99ce035ee8cc Mon Sep 17 00:00:00 2001 From: Michal Pecio Date: Wed, 6 Nov 2024 12:14:59 +0200 Subject: [PATCH 100/216] usb: xhci: Avoid queuing redundant Stop Endpoint commands [ Upstream commit 474538b8dd1cd9c666e56cfe8ef60fbb0fb513f4 ] Stop Endpoint command on an already stopped endpoint fails and may be misinterpreted as a known hardware bug by the completion handler. This results in an unnecessary delay with repeated retries of the command. Avoid queuing this command when endpoint state flags indicate that it's stopped or halted and the command will fail. If commands are pending on the endpoint, their completion handlers will process cancelled TDs so it's done. In case of waiting for external operations like clearing TT buffer, the endpoint is stopped and cancelled TDs can be processed now. This eliminates practically all unnecessary retries because an endpoint with pending URBs is maintained in Running state by the driver, unless aforementioned commands or other operations are pending on it. This is guaranteed by xhci_ring_ep_doorbell() and by the fact that it is called every time any of those operations completes. The only known exceptions are hardware bugs (the endpoint never starts at all) and Stream Protocol errors not associated with any TRB, which cause an endpoint reset not followed by restart. Sounds like a bug. Generally, these retries are only expected to happen when the endpoint fails to start for unknown/no reason, which is a worse problem itself, and fixing the bug eliminates the retries too. All cases were tested and found to work as expected. SET_DEQ_PENDING was produced by patching uvcvideo to unlink URBs in 100us intervals, which then runs into this case very often. EP_HALTED was produced by restarting 'cat /dev/ttyUSB0' on a serial dongle with broken cable. EP_CLEARING_TT by the same, with the dongle on an external hub. Fixes: fd9d55d190c0 ("xhci: retry Stop Endpoint on buggy NEC controllers") CC: stable@vger.kernel.org Signed-off-by: Michal Pecio Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20241106101459.775897-34-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin (cherry picked from commit 8b2e38f2a9b71af39f5697c30113136d4a5cae1a) --- drivers/usb/host/xhci-ring.c | 13 +++++++++++++ drivers/usb/host/xhci.c | 19 +++++++++++++++---- drivers/usb/host/xhci.h | 1 + 3 files changed, 29 insertions(+), 4 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index ee07a1cc4ba46..71c0da505be01 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -1092,6 +1092,19 @@ static int xhci_invalidate_cancelled_tds(struct xhci_virt_ep *ep) return 0; } +/* + * Erase queued TDs from transfer ring(s) and give back those the xHC didn't + * stop on. If necessary, queue commands to move the xHC off cancelled TDs it + * stopped on. Those will be given back later when the commands complete. + * + * Call under xhci->lock on a stopped endpoint. + */ +void xhci_process_cancelled_tds(struct xhci_virt_ep *ep) +{ + xhci_invalidate_cancelled_tds(ep); + xhci_giveback_invalidated_tds(ep); +} + /* * Returns the TD the endpoint ring halted on. * Only call for non-running rings without streams. diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 0e91e7a9e1f00..691d03313d027 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -1738,10 +1738,21 @@ static int xhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status) } } - /* Queue a stop endpoint command, but only if this is - * the first cancellation to be handled. - */ - if (!(ep->ep_state & EP_STOP_CMD_PENDING)) { + /* These completion handlers will sort out cancelled TDs for us */ + if (ep->ep_state & (EP_STOP_CMD_PENDING | EP_HALTED | SET_DEQ_PENDING)) { + xhci_dbg(xhci, "Not queuing Stop Endpoint on slot %d ep %d in state 0x%x\n", + urb->dev->slot_id, ep_index, ep->ep_state); + goto done; + } + + /* In this case no commands are pending but the endpoint is stopped */ + if (ep->ep_state & EP_CLEARING_TT) { + /* and cancelled TDs can be given back right away */ + xhci_dbg(xhci, "Invalidating TDs instantly on slot %d ep %d in state 0x%x\n", + urb->dev->slot_id, ep_index, ep->ep_state); + xhci_process_cancelled_tds(ep); + } else { + /* Otherwise, queue a new Stop Endpoint command */ command = xhci_alloc_command(xhci, false, GFP_ATOMIC); if (!command) { ret = -ENOMEM; diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 4b342a6eace27..de89837b2a64c 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1953,6 +1953,7 @@ void xhci_ring_doorbell_for_active_rings(struct xhci_hcd *xhci, void xhci_cleanup_command_queue(struct xhci_hcd *xhci); void inc_deq(struct xhci_hcd *xhci, struct xhci_ring *ring); unsigned int count_trbs(u64 addr, u64 len); +void xhci_process_cancelled_tds(struct xhci_virt_ep *ep); /* xHCI roothub code */ void xhci_set_link_state(struct xhci_hcd *xhci, struct xhci_port *port, From 17f5a1290b23c51b9c2c0e728542b9d211ca3ba7 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sat, 30 Nov 2024 10:00:08 +0100 Subject: [PATCH 101/216] ALSA: ump: Shut up truncated string warning [ Upstream commit ed990c07af70d286f5736021c6e25d8df6f2f7b0 ] The recent change for the legacy substream name update brought a compile warning for some compilers due to the nature of snprintf(). Use scnprintf() to shut up the warning since the truncation is intentional. Fixes: e29e504e7890 ("ALSA: ump: Indicate the inactive group in legacy substream names") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202411300103.FrGuTAYp-lkp@intel.com/ Link: https://patch.msgid.link/20241130090009.19849-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin (cherry picked from commit d4eb5b3c115d5a95bd0b8d4a584a93941b882b4d) --- sound/core/ump.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/core/ump.c b/sound/core/ump.c index 4aec90dac07ee..32d27e58416aa 100644 --- a/sound/core/ump.c +++ b/sound/core/ump.c @@ -1251,9 +1251,9 @@ static void fill_substream_names(struct snd_ump_endpoint *ump, name = ump->groups[idx].name; if (!*name) name = ump->info.name; - snprintf(s->name, sizeof(s->name), "Group %d (%.16s)%s", - idx + 1, name, - ump->groups[idx].active ? "" : " [Inactive]"); + scnprintf(s->name, sizeof(s->name), "Group %d (%.16s)%s", + idx + 1, name, + ump->groups[idx].active ? "" : " [Inactive]"); } } From aa10dbe19a30f3b223f7713d48f068ab2d41d545 Mon Sep 17 00:00:00 2001 From: Joe Hattori Date: Mon, 16 Dec 2024 11:25:38 +0900 Subject: [PATCH 102/216] platform/x86: mlx-platform: call pci_dev_put() to balance the refcount MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 185e1b1d91e419445d3fd99c1c0376a970438acf ] mlxplat_pci_fpga_device_init() calls pci_get_device() but does not release the refcount on error path. Call pci_dev_put() on the error path and in mlxplat_pci_fpga_device_exit() to fix this. This bug was found by an experimental static analysis tool that I am developing. Fixes: 02daa222fbdd ("platform: mellanox: Add initial support for PCIe based programming logic device") Signed-off-by: Joe Hattori Reviewed-by: Vadim Pasternak Link: https://lore.kernel.org/r/20241216022538.381209-1-joe@pf.is.s.u-tokyo.ac.jp Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen Signed-off-by: Sasha Levin (cherry picked from commit 23ea763880d6967615c23f690edd502ac794f38b) --- drivers/platform/x86/mlx-platform.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/platform/x86/mlx-platform.c b/drivers/platform/x86/mlx-platform.c index a2ffe4157df10..b8d77adc9ea14 100644 --- a/drivers/platform/x86/mlx-platform.c +++ b/drivers/platform/x86/mlx-platform.c @@ -6237,6 +6237,7 @@ mlxplat_pci_fpga_device_init(unsigned int device, const char *res_name, struct p fail_pci_request_regions: pci_disable_device(pci_dev); fail_pci_enable_device: + pci_dev_put(pci_dev); return err; } @@ -6247,6 +6248,7 @@ mlxplat_pci_fpga_device_exit(struct pci_dev *pci_bridge, iounmap(pci_bridge_addr); pci_release_regions(pci_bridge); pci_disable_device(pci_bridge); + pci_dev_put(pci_bridge); } static int From bc83b896e8ee60b8abbfbc829be34e507d0c842c Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 27 Jun 2024 15:17:11 +0800 Subject: [PATCH 103/216] f2fs: fix to wait dio completion commit 96cfeb0389530ae32ade8a48ae3ae1ac3b6c009d upstream. It should wait all existing dio write IOs before block removal, otherwise, previous direct write IO may overwrite data in the block which may be reused by other inode. Cc: stable@vger.kernel.org Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim [ Resolve line conflicts to make it work on 6.6.y ] Signed-off-by: Alva Lan Signed-off-by: Greg Kroah-Hartman (cherry picked from commit c2a7fc514637f640ff55c3f3e3ed879970814a3f) --- fs/f2fs/file.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 196755a34833d..ae129044c52f4 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1037,6 +1037,13 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, return err; } + /* + * wait for inflight dio, blocks should be removed after + * IO completion. + */ + if (attr->ia_size < old_size) + inode_dio_wait(inode); + f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); filemap_invalidate_lock(inode->i_mapping); @@ -1873,6 +1880,12 @@ static long f2fs_fallocate(struct file *file, int mode, if (ret) goto out; + /* + * wait for inflight dio, blocks should be removed after IO + * completion. + */ + inode_dio_wait(inode); + if (mode & FALLOC_FL_PUNCH_HOLE) { if (offset >= inode->i_size) goto out; From 9638a5cbc4ac67ed107b78eae7fa1cf61cb71dfc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thi=C3=A9baud=20Weksteen?= Date: Thu, 5 Dec 2024 12:09:19 +1100 Subject: [PATCH 104/216] selinux: ignore unknown extended permissions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 900f83cf376bdaf798b6f5dcb2eae0c822e908b6 upstream. When evaluating extended permissions, ignore unknown permissions instead of calling BUG(). This commit ensures that future permissions can be added without interfering with older kernels. Cc: stable@vger.kernel.org Fixes: fa1aa143ac4a ("selinux: extended permissions for ioctls") Signed-off-by: Thiébaud Weksteen Signed-off-by: Paul Moore Acked-by: Paul Moore Signed-off-by: Greg Kroah-Hartman (cherry picked from commit c1dbd28a079553de0023e1c938c713efeeee400f) --- security/selinux/ss/services.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 361cea20af730..03427046c8242 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -956,7 +956,10 @@ void services_compute_xperms_decision(struct extended_perms_decision *xpermd, xpermd->driver)) return; } else { - BUG(); + pr_warn_once( + "SELinux: unknown extended permission (%u) will be ignored\n", + node->datum.u.xperms->specified); + return; } if (node->key.specified == AVTAB_XPERMS_ALLOWED) { @@ -993,7 +996,8 @@ void services_compute_xperms_decision(struct extended_perms_decision *xpermd, node->datum.u.xperms->perms.p[i]; } } else { - BUG(); + pr_warn_once("SELinux: unknown specified key (%u)\n", + node->key.specified); } } From 4237a8c6c9c84ce1ca60e0eb7feef0a3af6b4d1a Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 13 Nov 2024 18:16:48 +0100 Subject: [PATCH 105/216] btrfs: fix use-after-free in btrfs_encoded_read_endio() commit 05b36b04d74a517d6675bf2f90829ff1ac7e28dc upstream. Shinichiro reported the following use-after free that sometimes is happening in our CI system when running fstests' btrfs/284 on a TCMU runner device: BUG: KASAN: slab-use-after-free in lock_release+0x708/0x780 Read of size 8 at addr ffff888106a83f18 by task kworker/u80:6/219 CPU: 8 UID: 0 PID: 219 Comm: kworker/u80:6 Not tainted 6.12.0-rc6-kts+ #15 Hardware name: Supermicro Super Server/X11SPi-TF, BIOS 3.3 02/21/2020 Workqueue: btrfs-endio btrfs_end_bio_work [btrfs] Call Trace: dump_stack_lvl+0x6e/0xa0 ? lock_release+0x708/0x780 print_report+0x174/0x505 ? lock_release+0x708/0x780 ? __virt_addr_valid+0x224/0x410 ? lock_release+0x708/0x780 kasan_report+0xda/0x1b0 ? lock_release+0x708/0x780 ? __wake_up+0x44/0x60 lock_release+0x708/0x780 ? __pfx_lock_release+0x10/0x10 ? __pfx_do_raw_spin_lock+0x10/0x10 ? lock_is_held_type+0x9a/0x110 _raw_spin_unlock_irqrestore+0x1f/0x60 __wake_up+0x44/0x60 btrfs_encoded_read_endio+0x14b/0x190 [btrfs] btrfs_check_read_bio+0x8d9/0x1360 [btrfs] ? lock_release+0x1b0/0x780 ? trace_lock_acquire+0x12f/0x1a0 ? __pfx_btrfs_check_read_bio+0x10/0x10 [btrfs] ? process_one_work+0x7e3/0x1460 ? lock_acquire+0x31/0xc0 ? process_one_work+0x7e3/0x1460 process_one_work+0x85c/0x1460 ? __pfx_process_one_work+0x10/0x10 ? assign_work+0x16c/0x240 worker_thread+0x5e6/0xfc0 ? __pfx_worker_thread+0x10/0x10 kthread+0x2c3/0x3a0 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x31/0x70 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1a/0x30 Allocated by task 3661: kasan_save_stack+0x30/0x50 kasan_save_track+0x14/0x30 __kasan_kmalloc+0xaa/0xb0 btrfs_encoded_read_regular_fill_pages+0x16c/0x6d0 [btrfs] send_extent_data+0xf0f/0x24a0 [btrfs] process_extent+0x48a/0x1830 [btrfs] changed_cb+0x178b/0x2ea0 [btrfs] btrfs_ioctl_send+0x3bf9/0x5c20 [btrfs] _btrfs_ioctl_send+0x117/0x330 [btrfs] btrfs_ioctl+0x184a/0x60a0 [btrfs] __x64_sys_ioctl+0x12e/0x1a0 do_syscall_64+0x95/0x180 entry_SYSCALL_64_after_hwframe+0x76/0x7e Freed by task 3661: kasan_save_stack+0x30/0x50 kasan_save_track+0x14/0x30 kasan_save_free_info+0x3b/0x70 __kasan_slab_free+0x4f/0x70 kfree+0x143/0x490 btrfs_encoded_read_regular_fill_pages+0x531/0x6d0 [btrfs] send_extent_data+0xf0f/0x24a0 [btrfs] process_extent+0x48a/0x1830 [btrfs] changed_cb+0x178b/0x2ea0 [btrfs] btrfs_ioctl_send+0x3bf9/0x5c20 [btrfs] _btrfs_ioctl_send+0x117/0x330 [btrfs] btrfs_ioctl+0x184a/0x60a0 [btrfs] __x64_sys_ioctl+0x12e/0x1a0 do_syscall_64+0x95/0x180 entry_SYSCALL_64_after_hwframe+0x76/0x7e The buggy address belongs to the object at ffff888106a83f00 which belongs to the cache kmalloc-rnd-07-96 of size 96 The buggy address is located 24 bytes inside of freed 96-byte region [ffff888106a83f00, ffff888106a83f60) The buggy address belongs to the physical page: page: refcount:1 mapcount:0 mapping:0000000000000000 index:0xffff888106a83800 pfn:0x106a83 flags: 0x17ffffc0000000(node=0|zone=2|lastcpupid=0x1fffff) page_type: f5(slab) raw: 0017ffffc0000000 ffff888100053680 ffffea0004917200 0000000000000004 raw: ffff888106a83800 0000000080200019 00000001f5000000 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff888106a83e00: fa fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc ffff888106a83e80: fa fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc >ffff888106a83f00: fa fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc ^ ffff888106a83f80: fa fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc ffff888106a84000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ================================================================== Further analyzing the trace and the crash dump's vmcore file shows that the wake_up() call in btrfs_encoded_read_endio() is calling wake_up() on the wait_queue that is in the private data passed to the end_io handler. Commit 4ff47df40447 ("btrfs: move priv off stack in btrfs_encoded_read_regular_fill_pages()") moved 'struct btrfs_encoded_read_private' off the stack. Before that commit one can see a corruption of the private data when analyzing the vmcore after a crash: *(struct btrfs_encoded_read_private *)0xffff88815626eec8 = { .wait = (wait_queue_head_t){ .lock = (spinlock_t){ .rlock = (struct raw_spinlock){ .raw_lock = (arch_spinlock_t){ .val = (atomic_t){ .counter = (int)-2005885696, }, .locked = (u8)0, .pending = (u8)157, .locked_pending = (u16)40192, .tail = (u16)34928, }, .magic = (unsigned int)536325682, .owner_cpu = (unsigned int)29, .owner = (void *)__SCT__tp_func_btrfs_transaction_commit+0x0 = 0x0, .dep_map = (struct lockdep_map){ .key = (struct lock_class_key *)0xffff8881575a3b6c, .class_cache = (struct lock_class *[2]){ 0xffff8882a71985c0, 0xffffea00066f5d40 }, .name = (const char *)0xffff88815626f100 = "", .wait_type_outer = (u8)37, .wait_type_inner = (u8)178, .lock_type = (u8)154, }, }, .__padding = (u8 [24]){ 0, 157, 112, 136, 50, 174, 247, 31, 29 }, .dep_map = (struct lockdep_map){ .key = (struct lock_class_key *)0xffff8881575a3b6c, .class_cache = (struct lock_class *[2]){ 0xffff8882a71985c0, 0xffffea00066f5d40 }, .name = (const char *)0xffff88815626f100 = "", .wait_type_outer = (u8)37, .wait_type_inner = (u8)178, .lock_type = (u8)154, }, }, .head = (struct list_head){ .next = (struct list_head *)0x112cca, .prev = (struct list_head *)0x47, }, }, .pending = (atomic_t){ .counter = (int)-1491499288, }, .status = (blk_status_t)130, } Here we can see several indicators of in-memory data corruption, e.g. the large negative atomic values of ->pending or ->wait->lock->rlock->raw_lock->val, as well as the bogus spinlock magic 0x1ff7ae32 (decimal 536325682 above) instead of 0xdead4ead or the bogus pointer values for ->wait->head. To fix this, change atomic_dec_return() to atomic_dec_and_test() to fix the corruption, as atomic_dec_return() is defined as two instructions on x86_64, whereas atomic_dec_and_test() is defined as a single atomic operation. This can lead to a situation where counter value is already decremented but the if statement in btrfs_encoded_read_endio() is not completely processed, i.e. the 0 test has not completed. If another thread continues executing btrfs_encoded_read_regular_fill_pages() the atomic_dec_return() there can see an already updated ->pending counter and continues by freeing the private data. Continuing in the endio handler the test for 0 succeeds and the wait_queue is woken up, resulting in a use-after-free. Reported-by: Shinichiro Kawasaki Suggested-by: Damien Le Moal Fixes: 1881fba89bd5 ("btrfs: add BTRFS_IOC_ENCODED_READ ioctl") CC: stable@vger.kernel.org # 6.1+ Reviewed-by: Filipe Manana Reviewed-by: Qu Wenruo Signed-off-by: Johannes Thumshirn Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Alva Lan Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 6228f13f1996a4feb9b601d6644bf0bfe03671dd) --- fs/btrfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d6767f728c079..eb9319d856f2d 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -9972,7 +9972,7 @@ static void btrfs_encoded_read_endio(struct btrfs_bio *bbio) */ WRITE_ONCE(priv->status, bbio->bio.bi_status); } - if (!atomic_dec_return(&priv->pending)) + if (atomic_dec_and_test(&priv->pending)) wake_up(&priv->wait); bio_put(&bbio->bio); } From 016378e5ff978dd303c6efe92c6d5ced22103378 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 12 Dec 2024 20:19:48 -0800 Subject: [PATCH 106/216] mmc: sdhci-msm: fix crypto key eviction commit 8d90a86ed053226a297ce062f4d9f4f521e05c4c upstream. Commit c7eed31e235c ("mmc: sdhci-msm: Switch to the new ICE API") introduced an incorrect check of the algorithm ID into the key eviction path, and thus qcom_ice_evict_key() is no longer ever called. Fix it. Fixes: c7eed31e235c ("mmc: sdhci-msm: Switch to the new ICE API") Cc: stable@vger.kernel.org Cc: Abel Vesa Signed-off-by: Eric Biggers Message-ID: <20241213041958.202565-6-ebiggers@kernel.org> Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman (cherry picked from commit de2a10e192264840c56d0b84e505ca671bdbe22d) --- drivers/mmc/host/sdhci-msm.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c index e113b99a3eab5..8716004fcf6c9 100644 --- a/drivers/mmc/host/sdhci-msm.c +++ b/drivers/mmc/host/sdhci-msm.c @@ -1867,20 +1867,20 @@ static int sdhci_msm_program_key(struct cqhci_host *cq_host, struct sdhci_msm_host *msm_host = sdhci_pltfm_priv(pltfm_host); union cqhci_crypto_cap_entry cap; + if (!(cfg->config_enable & CQHCI_CRYPTO_CONFIGURATION_ENABLE)) + return qcom_ice_evict_key(msm_host->ice, slot); + /* Only AES-256-XTS has been tested so far. */ cap = cq_host->crypto_cap_array[cfg->crypto_cap_idx]; if (cap.algorithm_id != CQHCI_CRYPTO_ALG_AES_XTS || cap.key_size != CQHCI_CRYPTO_KEY_SIZE_256) return -EINVAL; - if (cfg->config_enable & CQHCI_CRYPTO_CONFIGURATION_ENABLE) - return qcom_ice_program_key(msm_host->ice, - QCOM_ICE_CRYPTO_ALG_AES_XTS, - QCOM_ICE_CRYPTO_KEY_SIZE_256, - cfg->crypto_key, - cfg->data_unit_size, slot); - else - return qcom_ice_evict_key(msm_host->ice, slot); + return qcom_ice_program_key(msm_host->ice, + QCOM_ICE_CRYPTO_ALG_AES_XTS, + QCOM_ICE_CRYPTO_KEY_SIZE_256, + cfg->crypto_key, + cfg->data_unit_size, slot); } #else /* CONFIG_MMC_CRYPTO */ From 15a66972b4e5ec9186006b8c27e5d470704352cb Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 31 Dec 2024 00:06:46 -0500 Subject: [PATCH 107/216] tracing: Have process_string() also allow arrays commit afc6717628f959941d7b33728570568b4af1c4b8 upstream. In order to catch a common bug where a TRACE_EVENT() TP_fast_assign() assigns an address of an allocated string to the ring buffer and then references it in TP_printk(), which can be executed hours later when the string is free, the function test_event_printk() runs on all events as they are registered to make sure there's no unwanted dereferencing. It calls process_string() to handle cases in TP_printk() format that has "%s". It returns whether or not the string is safe. But it can have some false positives. For instance, xe_bo_move() has: TP_printk("move_lacks_source:%s, migrate object %p [size %zu] from %s to %s device_id:%s", __entry->move_lacks_source ? "yes" : "no", __entry->bo, __entry->size, xe_mem_type_to_name[__entry->old_placement], xe_mem_type_to_name[__entry->new_placement], __get_str(device_id)) Where the "%s" references into xe_mem_type_to_name[]. This is an array of pointers that should be safe for the event to access. Instead of flagging this as a bad reference, if a reference points to an array, where the record field is the index, consider it safe. Link: https://lore.kernel.org/all/9dee19b6185d325d0e6fa5f7cbba81d007d99166.camel@sapience.com/ Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Link: https://lore.kernel.org/20241231000646.324fb5f7@gandalf.local.home Fixes: 65a25d9f7ac02 ("tracing: Add "%s" check in test_event_printk()") Reported-by: Genes Lists Tested-by: Gene C Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman (cherry picked from commit a64e5295ebc4afdefe69cdf16cc286a60ff8ba4b) --- kernel/trace/trace_events.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 64cd856308e77..2ee59b217d7d2 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -360,6 +360,18 @@ static bool process_string(const char *fmt, int len, struct trace_event_call *ca s = r + 1; } while (s < e); + /* + * Check for arrays. If the argument has: foo[REC->val] + * then it is very likely that foo is an array of strings + * that are safe to use. + */ + r = strstr(s, "["); + if (r && r < e) { + r = strstr(r, "REC->"); + if (r && r < e) + return true; + } + /* * If there's any strings in the argument consider this arg OK as it * could be: REC->field ? "foo" : "bar" and we don't want to get into From 9f94c5f91d60edbd0420240531077ff40628a7b3 Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Mon, 18 Nov 2024 23:28:28 +0100 Subject: [PATCH 108/216] ceph: give up on paths longer than PATH_MAX MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 550f7ca98ee028a606aa75705a7e77b1bd11720f upstream. If the full path to be built by ceph_mdsc_build_path() happens to be longer than PATH_MAX, then this function will enter an endless (retry) loop, effectively blocking the whole task. Most of the machine becomes unusable, making this a very simple and effective DoS vulnerability. I cannot imagine why this retry was ever implemented, but it seems rather useless and harmful to me. Let's remove it and fail with ENAMETOOLONG instead. Cc: stable@vger.kernel.org Reported-by: Dario Weißer Signed-off-by: Max Kellermann Reviewed-by: Alex Markuze Signed-off-by: Ilya Dryomov [idryomov@gmail.com: backport to 6.6: pr_warn() is still in use] Signed-off-by: Greg Kroah-Hartman (cherry picked from commit c47ed91156daf328601d02b58d52d9804da54108) --- fs/ceph/mds_client.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 11289ce8a8cc8..dfa1b3c82b53a 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2713,12 +2713,11 @@ char *ceph_mdsc_build_path(struct ceph_mds_client *mdsc, struct dentry *dentry, if (pos < 0) { /* - * A rename didn't occur, but somehow we didn't end up where - * we thought we would. Throw a warning and try again. + * The path is longer than PATH_MAX and this function + * cannot ever succeed. Creating paths that long is + * possible with Ceph, but Linux cannot use them. */ - pr_warn("build_path did not end path lookup where expected (pos = %d)\n", - pos); - goto retry; + return ERR_PTR(-ENAMETOOLONG); } *pbase = base; From 47514898f32fc6b98faa198c3583114fa6158d68 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Wed, 18 Dec 2024 11:53:01 +0800 Subject: [PATCH 109/216] net: mctp: handle skb cleanup on sock_queue failures [ Upstream commit ce1219c3f76bb131d095e90521506d3c6ccfa086 ] Currently, we don't use the return value from sock_queue_rcv_skb, which means we may leak skbs if a message is not successfully queued to a socket. Instead, ensure that we're freeing the skb where the sock hasn't otherwise taken ownership of the skb by adding checks on the sock_queue_rcv_skb() to invoke a kfree on failure. In doing so, rather than using the 'rc' value to trigger the kfree_skb(), use the skb pointer itself, which is more explicit. Also, add a kunit test for the sock delivery failure cases. Fixes: 4a992bbd3650 ("mctp: Implement message fragmentation & reassembly") Cc: stable@vger.kernel.org Signed-off-by: Jeremy Kerr Link: https://patch.msgid.link/20241218-mctp-next-v2-1-1c1729645eaa@codeconstruct.com.au Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin (cherry picked from commit 1ec141d8f51b21a22e05ecd3bd1bf52e10fe00a1) --- net/mctp/route.c | 36 ++++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/net/mctp/route.c b/net/mctp/route.c index c6a815df9d358..d3c1f54386efc 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -334,8 +334,13 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb) msk = NULL; rc = -EINVAL; - /* we may be receiving a locally-routed packet; drop source sk - * accounting + /* We may be receiving a locally-routed packet; drop source sk + * accounting. + * + * From here, we will either queue the skb - either to a frag_queue, or + * to a receiving socket. When that succeeds, we clear the skb pointer; + * a non-NULL skb on exit will be otherwise unowned, and hence + * kfree_skb()-ed. */ skb_orphan(skb); @@ -389,7 +394,9 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb) * pending key. */ if (flags & MCTP_HDR_FLAG_EOM) { - sock_queue_rcv_skb(&msk->sk, skb); + rc = sock_queue_rcv_skb(&msk->sk, skb); + if (!rc) + skb = NULL; if (key) { /* we've hit a pending reassembly; not much we * can do but drop it @@ -398,7 +405,6 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb) MCTP_TRACE_KEY_REPLIED); key = NULL; } - rc = 0; goto out_unlock; } @@ -425,8 +431,10 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb) * this function. */ rc = mctp_key_add(key, msk); - if (!rc) + if (!rc) { trace_mctp_key_acquire(key); + skb = NULL; + } /* we don't need to release key->lock on exit, so * clean up here and suppress the unlock via @@ -444,6 +452,8 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb) key = NULL; } else { rc = mctp_frag_queue(key, skb); + if (!rc) + skb = NULL; } } @@ -458,12 +468,19 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb) else rc = mctp_frag_queue(key, skb); + if (rc) + goto out_unlock; + + /* we've queued; the queue owns the skb now */ + skb = NULL; + /* end of message? deliver to socket, and we're done with * the reassembly/response key */ - if (!rc && flags & MCTP_HDR_FLAG_EOM) { - sock_queue_rcv_skb(key->sk, key->reasm_head); - key->reasm_head = NULL; + if (flags & MCTP_HDR_FLAG_EOM) { + rc = sock_queue_rcv_skb(key->sk, key->reasm_head); + if (!rc) + key->reasm_head = NULL; __mctp_key_done_in(key, net, f, MCTP_TRACE_KEY_REPLIED); key = NULL; } @@ -482,8 +499,7 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb) if (any_key) mctp_key_unref(any_key); out: - if (rc) - kfree_skb(skb); + kfree_skb(skb); return rc; } From ca7943a34550b281108c47a925f60e15793c5996 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 20 Oct 2023 04:35:45 +0100 Subject: [PATCH 110/216] tracing: Move readpos from seq_buf to trace_seq [ Upstream commit d0ed46b60396cfa7e0056f55e1ce0b43c7db57b6 ] To make seq_buf more lightweight as a string buf, move the readpos member from seq_buf to its container, trace_seq. That puts the responsibility of maintaining the readpos entirely in the tracing code. If some future users want to package up the readpos with a seq_buf, we can define a new struct then. Link: https://lore.kernel.org/linux-trace-kernel/20231020033545.2587554-2-willy@infradead.org Cc: Kees Cook Cc: Justin Stitt Cc: Kent Overstreet Cc: Petr Mladek Cc: Andy Shevchenko Cc: Rasmus Villemoes Cc: Sergey Senozhatsky Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Greg Kroah-Hartman Signed-off-by: Steven Rostedt (Google) Stable-dep-of: afd2627f727b ("tracing: Check "%s" dereference via the field and not the TP_printk format") Signed-off-by: Sasha Levin (cherry picked from commit c46547b4686e9099dbad1f6f8e29f65a0b2c461c) --- include/linux/seq_buf.h | 5 +---- include/linux/trace_seq.h | 2 ++ kernel/trace/trace.c | 10 +++++----- kernel/trace/trace_seq.c | 6 +++++- lib/seq_buf.c | 22 ++++++++++------------ 5 files changed, 23 insertions(+), 22 deletions(-) diff --git a/include/linux/seq_buf.h b/include/linux/seq_buf.h index 515d7fcb9634b..a0fb013cebdf2 100644 --- a/include/linux/seq_buf.h +++ b/include/linux/seq_buf.h @@ -14,19 +14,16 @@ * @buffer: pointer to the buffer * @size: size of the buffer * @len: the amount of data inside the buffer - * @readpos: The next position to read in the buffer. */ struct seq_buf { char *buffer; size_t size; size_t len; - loff_t readpos; }; static inline void seq_buf_clear(struct seq_buf *s) { s->len = 0; - s->readpos = 0; } static inline void @@ -143,7 +140,7 @@ extern __printf(2, 0) int seq_buf_vprintf(struct seq_buf *s, const char *fmt, va_list args); extern int seq_buf_print_seq(struct seq_file *m, struct seq_buf *s); extern int seq_buf_to_user(struct seq_buf *s, char __user *ubuf, - int cnt); + size_t start, int cnt); extern int seq_buf_puts(struct seq_buf *s, const char *str); extern int seq_buf_putc(struct seq_buf *s, unsigned char c); extern int seq_buf_putmem(struct seq_buf *s, const void *mem, unsigned int len); diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h index 6be92bf559fe7..3691e0e76a1a2 100644 --- a/include/linux/trace_seq.h +++ b/include/linux/trace_seq.h @@ -14,6 +14,7 @@ struct trace_seq { char buffer[PAGE_SIZE]; struct seq_buf seq; + size_t readpos; int full; }; @@ -22,6 +23,7 @@ trace_seq_init(struct trace_seq *s) { seq_buf_init(&s->seq, s->buffer, PAGE_SIZE); s->full = 0; + s->readpos = 0; } /** diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 220903117c513..83f6ef4d74197 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1731,15 +1731,15 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt) { int len; - if (trace_seq_used(s) <= s->seq.readpos) + if (trace_seq_used(s) <= s->readpos) return -EBUSY; - len = trace_seq_used(s) - s->seq.readpos; + len = trace_seq_used(s) - s->readpos; if (cnt > len) cnt = len; - memcpy(buf, s->buffer + s->seq.readpos, cnt); + memcpy(buf, s->buffer + s->readpos, cnt); - s->seq.readpos += cnt; + s->readpos += cnt; return cnt; } @@ -7011,7 +7011,7 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, /* Now copy what we have to the user */ sret = trace_seq_to_user(&iter->seq, ubuf, cnt); - if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq)) + if (iter->seq.readpos >= trace_seq_used(&iter->seq)) trace_seq_init(&iter->seq); /* diff --git a/kernel/trace/trace_seq.c b/kernel/trace/trace_seq.c index bac06ee3b98b8..7be97229ddf86 100644 --- a/kernel/trace/trace_seq.c +++ b/kernel/trace/trace_seq.c @@ -370,8 +370,12 @@ EXPORT_SYMBOL_GPL(trace_seq_path); */ int trace_seq_to_user(struct trace_seq *s, char __user *ubuf, int cnt) { + int ret; __trace_seq_init(s); - return seq_buf_to_user(&s->seq, ubuf, cnt); + ret = seq_buf_to_user(&s->seq, ubuf, s->readpos, cnt); + if (ret > 0) + s->readpos += ret; + return ret; } EXPORT_SYMBOL_GPL(trace_seq_to_user); diff --git a/lib/seq_buf.c b/lib/seq_buf.c index 45c450f423fa8..b7477aefff537 100644 --- a/lib/seq_buf.c +++ b/lib/seq_buf.c @@ -324,23 +324,24 @@ int seq_buf_path(struct seq_buf *s, const struct path *path, const char *esc) * seq_buf_to_user - copy the sequence buffer to user space * @s: seq_buf descriptor * @ubuf: The userspace memory location to copy to + * @start: The first byte in the buffer to copy * @cnt: The amount to copy * * Copies the sequence buffer into the userspace memory pointed to - * by @ubuf. It starts from the last read position (@s->readpos) - * and writes up to @cnt characters or till it reaches the end of - * the content in the buffer (@s->len), which ever comes first. + * by @ubuf. It starts from @start and writes up to @cnt characters + * or until it reaches the end of the content in the buffer (@s->len), + * whichever comes first. * * On success, it returns a positive number of the number of bytes * it copied. * * On failure it returns -EBUSY if all of the content in the * sequence has been already read, which includes nothing in the - * sequence (@s->len == @s->readpos). + * sequence (@s->len == @start). * * Returns -EFAULT if the copy to userspace fails. */ -int seq_buf_to_user(struct seq_buf *s, char __user *ubuf, int cnt) +int seq_buf_to_user(struct seq_buf *s, char __user *ubuf, size_t start, int cnt) { int len; int ret; @@ -350,20 +351,17 @@ int seq_buf_to_user(struct seq_buf *s, char __user *ubuf, int cnt) len = seq_buf_used(s); - if (len <= s->readpos) + if (len <= start) return -EBUSY; - len -= s->readpos; + len -= start; if (cnt > len) cnt = len; - ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt); + ret = copy_to_user(ubuf, s->buffer + start, cnt); if (ret == cnt) return -EFAULT; - cnt -= ret; - - s->readpos += cnt; - return cnt; + return cnt - ret; } /** From c750196e79f50996ae1b715ab5c92435c80cdd63 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 24 Oct 2023 15:55:59 +0100 Subject: [PATCH 111/216] powerpc: Remove initialisation of readpos [ Upstream commit 0f7f544af60a6082cfaa3ed4c8f4ca1a858807ee ] While powerpc doesn't use the seq_buf readpos, it did explicitly initialise it for no good reason. Link: https://lore.kernel.org/linux-trace-kernel/20231024145600.739451-1-willy@infradead.org Cc: Christoph Hellwig Cc: Justin Stitt Cc: Kent Overstreet Cc: Petr Mladek Cc: Andy Shevchenko Cc: Rasmus Villemoes Cc: Sergey Senozhatsky Cc: Michael Ellerman Reviewed-by: Kees Cook Fixes: d0ed46b60396 ("tracing: Move readpos from seq_buf to trace_seq") Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Steven Rostedt (Google) Signed-off-by: Sasha Levin (cherry picked from commit cd27bbe8981044b2720965756a3919c450f72978) --- arch/powerpc/kernel/setup-common.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index d43db8150767b..dddf4f31c219a 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -601,7 +601,6 @@ struct seq_buf ppc_hw_desc __initdata = { .buffer = ppc_hw_desc_buf, .size = sizeof(ppc_hw_desc_buf), .len = 0, - .readpos = 0, }; static __init void probe_machine(void) From 1b9bdb20804b6ab201bca6faef3693f788292642 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 27 Oct 2023 08:56:38 -0700 Subject: [PATCH 112/216] seq_buf: Introduce DECLARE_SEQ_BUF and seq_buf_str() [ Upstream commit dcc4e5728eeaeda84878ca0018758cff1abfca21 ] Solve two ergonomic issues with struct seq_buf; 1) Too much boilerplate is required to initialize: struct seq_buf s; char buf[32]; seq_buf_init(s, buf, sizeof(buf)); Instead, we can build this directly on the stack. Provide DECLARE_SEQ_BUF() macro to do this: DECLARE_SEQ_BUF(s, 32); 2) %NUL termination is fragile and requires 2 steps to get a valid C String (and is a layering violation exposing the "internals" of seq_buf): seq_buf_terminate(s); do_something(s->buffer); Instead, we can just return s->buffer directly after terminating it in the refactored seq_buf_terminate(), now known as seq_buf_str(): do_something(seq_buf_str(s)); Link: https://lore.kernel.org/linux-trace-kernel/20231027155634.make.260-kees@kernel.org Link: https://lore.kernel.org/linux-trace-kernel/20231026194033.it.702-kees@kernel.org/ Cc: Yosry Ahmed Cc: "Matthew Wilcox (Oracle)" Cc: Christoph Hellwig Cc: Justin Stitt Cc: Kent Overstreet Cc: Petr Mladek Cc: Andy Shevchenko Cc: Rasmus Villemoes Cc: Sergey Senozhatsky Cc: Masami Hiramatsu Cc: Greg Kroah-Hartman Cc: Arnd Bergmann Cc: Jonathan Corbet Cc: Yun Zhou Cc: Jacob Keller Cc: Zhen Lei Signed-off-by: Kees Cook Signed-off-by: Steven Rostedt (Google) Stable-dep-of: afd2627f727b ("tracing: Check "%s" dereference via the field and not the TP_printk format") Signed-off-by: Sasha Levin (cherry picked from commit 6920e362bc080d045dd1eca431c7819f22014a81) --- include/linux/seq_buf.h | 21 +++++++++++++++++---- kernel/trace/trace.c | 11 +---------- lib/seq_buf.c | 4 +--- 3 files changed, 19 insertions(+), 17 deletions(-) diff --git a/include/linux/seq_buf.h b/include/linux/seq_buf.h index a0fb013cebdf2..d9db59f420a49 100644 --- a/include/linux/seq_buf.h +++ b/include/linux/seq_buf.h @@ -21,9 +21,18 @@ struct seq_buf { size_t len; }; +#define DECLARE_SEQ_BUF(NAME, SIZE) \ + char __ ## NAME ## _buffer[SIZE] = ""; \ + struct seq_buf NAME = { \ + .buffer = &__ ## NAME ## _buffer, \ + .size = SIZE, \ + } + static inline void seq_buf_clear(struct seq_buf *s) { s->len = 0; + if (s->size) + s->buffer[0] = '\0'; } static inline void @@ -69,8 +78,8 @@ static inline unsigned int seq_buf_used(struct seq_buf *s) } /** - * seq_buf_terminate - Make sure buffer is nul terminated - * @s: the seq_buf descriptor to terminate. + * seq_buf_str - get %NUL-terminated C string from seq_buf + * @s: the seq_buf handle * * This makes sure that the buffer in @s is nul terminated and * safe to read as a string. @@ -81,16 +90,20 @@ static inline unsigned int seq_buf_used(struct seq_buf *s) * * After this function is called, s->buffer is safe to use * in string operations. + * + * Returns @s->buf after making sure it is terminated. */ -static inline void seq_buf_terminate(struct seq_buf *s) +static inline const char *seq_buf_str(struct seq_buf *s) { if (WARN_ON(s->size == 0)) - return; + return ""; if (seq_buf_buffer_left(s)) s->buffer[s->len] = 0; else s->buffer[s->size - 1] = 0; + + return s->buffer; } /** diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 83f6ef4d74197..d9406a1f87950 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3810,15 +3810,6 @@ static bool trace_safe_str(struct trace_iterator *iter, const char *str, return false; } -static const char *show_buffer(struct trace_seq *s) -{ - struct seq_buf *seq = &s->seq; - - seq_buf_terminate(seq); - - return seq->buffer; -} - static DEFINE_STATIC_KEY_FALSE(trace_no_verify); static int test_can_verify_check(const char *fmt, ...) @@ -3958,7 +3949,7 @@ void trace_check_vprintf(struct trace_iterator *iter, const char *fmt, */ if (WARN_ONCE(!trace_safe_str(iter, str, star, len), "fmt: '%s' current_buffer: '%s'", - fmt, show_buffer(&iter->seq))) { + fmt, seq_buf_str(&iter->seq.seq))) { int ret; /* Try to safely read the string */ diff --git a/lib/seq_buf.c b/lib/seq_buf.c index b7477aefff537..23518f77ea9c5 100644 --- a/lib/seq_buf.c +++ b/lib/seq_buf.c @@ -109,9 +109,7 @@ void seq_buf_do_printk(struct seq_buf *s, const char *lvl) if (s->size == 0 || s->len == 0) return; - seq_buf_terminate(s); - - start = s->buffer; + start = seq_buf_str(s); while ((lf = strchr(start, '\n'))) { int len = lf - start + 1; From ed6a820de532892921dc2833cea5a04471279d82 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Wed, 12 Jun 2024 19:19:45 -0400 Subject: [PATCH 113/216] tracing: Handle old buffer mappings for event strings and functions [ Upstream commit 07714b4bb3f9800261c8b4b2f47e9010ed60979d ] Use the saved text_delta and data_delta of a persistent memory mapped ring buffer that was saved from a previous boot, and use the delta in the trace event print output so that strings and functions show up normally. That is, for an event like trace_kmalloc() that prints the callsite via "%pS", if it used the address saved in the ring buffer it will not match the function that was saved in the previous boot if the kernel remaps itself between boots. For RCU events that point to saved static strings where only the address of the string is saved in the ring buffer, it too will be adjusted to point to where the string is on the current boot. Link: https://lkml.kernel.org/r/20240612232026.821020753@goodmis.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Vincent Donnefort Cc: Joel Fernandes Cc: Daniel Bristot de Oliveira Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vineeth Pillai Cc: Youssef Esmat Cc: Beau Belgrave Cc: Alexander Graf Cc: Baoquan He Cc: Borislav Petkov Cc: "Paul E. McKenney" Cc: David Howells Cc: Mike Rapoport Cc: Dave Hansen Cc: Tony Luck Cc: Guenter Roeck Cc: Ross Zwisler Cc: Kees Cook Signed-off-by: Steven Rostedt (Google) Stable-dep-of: afd2627f727b ("tracing: Check "%s" dereference via the field and not the TP_printk format") Signed-off-by: Sasha Levin (cherry picked from commit 680c07fabc2bc211f3b60c1d84f500a0bf2ec46c) --- kernel/trace/trace.c | 42 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d9406a1f87950..2a45efc4e4171 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3858,8 +3858,11 @@ static void test_can_verify(void) void trace_check_vprintf(struct trace_iterator *iter, const char *fmt, va_list ap) { + long text_delta = iter->tr->text_delta; + long data_delta = iter->tr->data_delta; const char *p = fmt; const char *str; + bool good; int i, j; if (WARN_ON_ONCE(!fmt)) @@ -3878,7 +3881,10 @@ void trace_check_vprintf(struct trace_iterator *iter, const char *fmt, j = 0; - /* We only care about %s and variants */ + /* + * We only care about %s and variants + * as well as %p[sS] if delta is non-zero + */ for (i = 0; p[i]; i++) { if (i + 1 >= iter->fmt_size) { /* @@ -3907,6 +3913,11 @@ void trace_check_vprintf(struct trace_iterator *iter, const char *fmt, } if (p[i+j] == 's') break; + + if (text_delta && p[i+1] == 'p' && + ((p[i+2] == 's' || p[i+2] == 'S'))) + break; + star = false; } j = 0; @@ -3920,6 +3931,24 @@ void trace_check_vprintf(struct trace_iterator *iter, const char *fmt, iter->fmt[i] = '\0'; trace_seq_vprintf(&iter->seq, iter->fmt, ap); + /* Add delta to %pS pointers */ + if (p[i+1] == 'p') { + unsigned long addr; + char fmt[4]; + + fmt[0] = '%'; + fmt[1] = 'p'; + fmt[2] = p[i+2]; /* Either %ps or %pS */ + fmt[3] = '\0'; + + addr = va_arg(ap, unsigned long); + addr += text_delta; + trace_seq_printf(&iter->seq, fmt, (void *)addr); + + p += i + 3; + continue; + } + /* * If iter->seq is full, the above call no longer guarantees * that ap is in sync with fmt processing, and further calls @@ -3938,6 +3967,14 @@ void trace_check_vprintf(struct trace_iterator *iter, const char *fmt, /* The ap now points to the string data of the %s */ str = va_arg(ap, const char *); + good = trace_safe_str(iter, str, star, len); + + /* Could be from the last boot */ + if (data_delta && !good) { + str += data_delta; + good = trace_safe_str(iter, str, star, len); + } + /* * If you hit this warning, it is likely that the * trace event in question used %s on a string that @@ -3947,8 +3984,7 @@ void trace_check_vprintf(struct trace_iterator *iter, const char *fmt, * instead. See samples/trace_events/trace-events-sample.h * for reference. */ - if (WARN_ONCE(!trace_safe_str(iter, str, star, len), - "fmt: '%s' current_buffer: '%s'", + if (WARN_ONCE(!good, "fmt: '%s' current_buffer: '%s'", fmt, seq_buf_str(&iter->seq.seq))) { int ret; From 5ca858b1fc73e5a176c4d9dbaa513fee5ca29d92 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 3 Oct 2024 10:49:25 -0400 Subject: [PATCH 114/216] tracing: Fix trace_check_vprintf() when tp_printk is used [ Upstream commit 50a3242d84ee1625b0bfef29b95f935958dccfbe ] When the tp_printk kernel command line is used, the trace events go directly to printk(). It is still checked via the trace_check_vprintf() function to make sure the pointers of the trace event are legit. The addition of reading buffers from previous boots required adding a delta between the addresses of the previous boot and the current boot so that the pointers in the old buffer can still be used. But this required adding a trace_array pointer to acquire the delta offsets. The tp_printk code does not provide a trace_array (tr) pointer, so when the offsets were examined, a NULL pointer dereference happened and the kernel crashed. If the trace_array does not exist, just default the delta offsets to zero, as that also means the trace event is not being read from a previous boot. Link: https://lore.kernel.org/all/Zv3z5UsG_jsO9_Tb@aschofie-mobl2.lan/ Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Link: https://lore.kernel.org/20241003104925.4e1b1fd9@gandalf.local.home Fixes: 07714b4bb3f98 ("tracing: Handle old buffer mappings for event strings and functions") Reported-by: Alison Schofield Tested-by: Alison Schofield Signed-off-by: Steven Rostedt (Google) Stable-dep-of: afd2627f727b ("tracing: Check "%s" dereference via the field and not the TP_printk format") Signed-off-by: Sasha Levin (cherry picked from commit 55841e8820b9b5f2c6fb5e7bb072d688b1cc11bd) --- kernel/trace/trace.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 2a45efc4e4171..addc1b326c79d 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3858,8 +3858,8 @@ static void test_can_verify(void) void trace_check_vprintf(struct trace_iterator *iter, const char *fmt, va_list ap) { - long text_delta = iter->tr->text_delta; - long data_delta = iter->tr->data_delta; + long text_delta = 0; + long data_delta = 0; const char *p = fmt; const char *str; bool good; @@ -3871,6 +3871,17 @@ void trace_check_vprintf(struct trace_iterator *iter, const char *fmt, if (static_branch_unlikely(&trace_no_verify)) goto print; + /* + * When the kernel is booted with the tp_printk command line + * parameter, trace events go directly through to printk(). + * It also is checked by this function, but it does not + * have an associated trace_array (tr) for it. + */ + if (iter->tr) { + text_delta = iter->tr->text_delta; + data_delta = iter->tr->data_delta; + } + /* Don't bother checking when doing a ftrace_dump() */ if (iter->fmt == static_fmt_buf) goto print; From f0827dc23166ebe52aac23e416136fe0036b39e4 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 16 Dec 2024 21:41:22 -0500 Subject: [PATCH 115/216] tracing: Check "%s" dereference via the field and not the TP_printk format [ Upstream commit afd2627f727b89496d79a6b934a025fc916d4ded ] The TP_printk() portion of a trace event is executed at the time a event is read from the trace. This can happen seconds, minutes, hours, days, months, years possibly later since the event was recorded. If the print format contains a dereference to a string via "%s", and that string was allocated, there's a chance that string could be freed before it is read by the trace file. To protect against such bugs, there are two functions that verify the event. The first one is test_event_printk(), which is called when the event is created. It reads the TP_printk() format as well as its arguments to make sure nothing may be dereferencing a pointer that was not copied into the ring buffer along with the event. If it is, it will trigger a WARN_ON(). For strings that use "%s", it is not so easy. The string may not reside in the ring buffer but may still be valid. Strings that are static and part of the kernel proper which will not be freed for the life of the running system, are safe to dereference. But to know if it is a pointer to a static string or to something on the heap can not be determined until the event is triggered. This brings us to the second function that tests for the bad dereferencing of strings, trace_check_vprintf(). It would walk through the printf format looking for "%s", and when it finds it, it would validate that the pointer is safe to read. If not, it would produces a WARN_ON() as well and write into the ring buffer "[UNSAFE-MEMORY]". The problem with this is how it used va_list to have vsnprintf() handle all the cases that it didn't need to check. Instead of re-implementing vsnprintf(), it would make a copy of the format up to the %s part, and call vsnprintf() with the current va_list ap variable, where the ap would then be ready to point at the string in question. For architectures that passed va_list by reference this was possible. For architectures that passed it by copy it was not. A test_can_verify() function was used to differentiate between the two, and if it wasn't possible, it would disable it. Even for architectures where this was feasible, it was a stretch to rely on such a method that is undocumented, and could cause issues later on with new optimizations of the compiler. Instead, the first function test_event_printk() was updated to look at "%s" as well. If the "%s" argument is a pointer outside the event in the ring buffer, it would find the field type of the event that is the problem and mark the structure with a new flag called "needs_test". The event itself will be marked by TRACE_EVENT_FL_TEST_STR to let it be known that this event has a field that needs to be verified before the event can be printed using the printf format. When the event fields are created from the field type structure, the fields would copy the field type's "needs_test" value. Finally, before being printed, a new function ignore_event() is called which will check if the event has the TEST_STR flag set (if not, it returns false). If the flag is set, it then iterates through the events fields looking for the ones that have the "needs_test" flag set. Then it uses the offset field from the field structure to find the pointer in the ring buffer event. It runs the tests to make sure that pointer is safe to print and if not, it triggers the WARN_ON() and also adds to the trace output that the event in question has an unsafe memory access. The ignore_event() makes the trace_check_vprintf() obsolete so it is removed. Link: https://lore.kernel.org/all/CAHk-=wh3uOnqnZPpR0PeLZZtyWbZLboZ7cHLCKRWsocvs9Y7hQ@mail.gmail.com/ Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Al Viro Cc: Linus Torvalds Link: https://lore.kernel.org/20241217024720.848621576@goodmis.org Fixes: 5013f454a352c ("tracing: Add check of trace event print fmts for dereferencing pointers") Signed-off-by: Steven Rostedt (Google) Signed-off-by: Sasha Levin (cherry picked from commit f452f397f9a6a605989e4151078ab76b41d490cc) --- include/linux/trace_events.h | 6 +- kernel/trace/trace.c | 255 ++++++++--------------------------- kernel/trace/trace.h | 6 +- kernel/trace/trace_events.c | 32 +++-- kernel/trace/trace_output.c | 6 +- 5 files changed, 88 insertions(+), 217 deletions(-) diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 9df2524fff33a..aa1bc41726620 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -279,7 +279,8 @@ struct trace_event_fields { const char *name; const int size; const int align; - const int is_signed; + const unsigned int is_signed:1; + unsigned int needs_test:1; const int filter_type; const int len; }; @@ -331,6 +332,7 @@ enum { TRACE_EVENT_FL_EPROBE_BIT, TRACE_EVENT_FL_FPROBE_BIT, TRACE_EVENT_FL_CUSTOM_BIT, + TRACE_EVENT_FL_TEST_STR_BIT, }; /* @@ -348,6 +350,7 @@ enum { * CUSTOM - Event is a custom event (to be attached to an exsiting tracepoint) * This is set when the custom event has not been attached * to a tracepoint yet, then it is cleared when it is. + * TEST_STR - The event has a "%s" that points to a string outside the event */ enum { TRACE_EVENT_FL_FILTERED = (1 << TRACE_EVENT_FL_FILTERED_BIT), @@ -361,6 +364,7 @@ enum { TRACE_EVENT_FL_EPROBE = (1 << TRACE_EVENT_FL_EPROBE_BIT), TRACE_EVENT_FL_FPROBE = (1 << TRACE_EVENT_FL_FPROBE_BIT), TRACE_EVENT_FL_CUSTOM = (1 << TRACE_EVENT_FL_CUSTOM_BIT), + TRACE_EVENT_FL_TEST_STR = (1 << TRACE_EVENT_FL_TEST_STR_BIT), }; #define TRACE_EVENT_FL_UKPROBE (TRACE_EVENT_FL_KPROBE | TRACE_EVENT_FL_UPROBE) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index addc1b326c79d..9d9af60b238e2 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3760,17 +3760,12 @@ char *trace_iter_expand_format(struct trace_iterator *iter) } /* Returns true if the string is safe to dereference from an event */ -static bool trace_safe_str(struct trace_iterator *iter, const char *str, - bool star, int len) +static bool trace_safe_str(struct trace_iterator *iter, const char *str) { unsigned long addr = (unsigned long)str; struct trace_event *trace_event; struct trace_event_call *event; - /* Ignore strings with no length */ - if (star && !len) - return true; - /* OK if part of the event data */ if ((addr >= (unsigned long)iter->ent) && (addr < (unsigned long)iter->ent + iter->ent_size)) @@ -3810,181 +3805,69 @@ static bool trace_safe_str(struct trace_iterator *iter, const char *str, return false; } -static DEFINE_STATIC_KEY_FALSE(trace_no_verify); - -static int test_can_verify_check(const char *fmt, ...) -{ - char buf[16]; - va_list ap; - int ret; - - /* - * The verifier is dependent on vsnprintf() modifies the va_list - * passed to it, where it is sent as a reference. Some architectures - * (like x86_32) passes it by value, which means that vsnprintf() - * does not modify the va_list passed to it, and the verifier - * would then need to be able to understand all the values that - * vsnprintf can use. If it is passed by value, then the verifier - * is disabled. - */ - va_start(ap, fmt); - vsnprintf(buf, 16, "%d", ap); - ret = va_arg(ap, int); - va_end(ap); - - return ret; -} - -static void test_can_verify(void) -{ - if (!test_can_verify_check("%d %d", 0, 1)) { - pr_info("trace event string verifier disabled\n"); - static_branch_inc(&trace_no_verify); - } -} - /** - * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer + * ignore_event - Check dereferenced fields while writing to the seq buffer * @iter: The iterator that holds the seq buffer and the event being printed - * @fmt: The format used to print the event - * @ap: The va_list holding the data to print from @fmt. * - * This writes the data into the @iter->seq buffer using the data from - * @fmt and @ap. If the format has a %s, then the source of the string - * is examined to make sure it is safe to print, otherwise it will - * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string - * pointer. + * At boot up, test_event_printk() will flag any event that dereferences + * a string with "%s" that does exist in the ring buffer. It may still + * be valid, as the string may point to a static string in the kernel + * rodata that never gets freed. But if the string pointer is pointing + * to something that was allocated, there's a chance that it can be freed + * by the time the user reads the trace. This would cause a bad memory + * access by the kernel and possibly crash the system. + * + * This function will check if the event has any fields flagged as needing + * to be checked at runtime and perform those checks. + * + * If it is found that a field is unsafe, it will write into the @iter->seq + * a message stating what was found to be unsafe. + * + * @return: true if the event is unsafe and should be ignored, + * false otherwise. */ -void trace_check_vprintf(struct trace_iterator *iter, const char *fmt, - va_list ap) +bool ignore_event(struct trace_iterator *iter) { - long text_delta = 0; - long data_delta = 0; - const char *p = fmt; - const char *str; - bool good; - int i, j; + struct ftrace_event_field *field; + struct trace_event *trace_event; + struct trace_event_call *event; + struct list_head *head; + struct trace_seq *seq; + const void *ptr; - if (WARN_ON_ONCE(!fmt)) - return; + trace_event = ftrace_find_event(iter->ent->type); - if (static_branch_unlikely(&trace_no_verify)) - goto print; + seq = &iter->seq; - /* - * When the kernel is booted with the tp_printk command line - * parameter, trace events go directly through to printk(). - * It also is checked by this function, but it does not - * have an associated trace_array (tr) for it. - */ - if (iter->tr) { - text_delta = iter->tr->text_delta; - data_delta = iter->tr->data_delta; + if (!trace_event) { + trace_seq_printf(seq, "EVENT ID %d NOT FOUND?\n", iter->ent->type); + return true; } - /* Don't bother checking when doing a ftrace_dump() */ - if (iter->fmt == static_fmt_buf) - goto print; - - while (*p) { - bool star = false; - int len = 0; - - j = 0; - - /* - * We only care about %s and variants - * as well as %p[sS] if delta is non-zero - */ - for (i = 0; p[i]; i++) { - if (i + 1 >= iter->fmt_size) { - /* - * If we can't expand the copy buffer, - * just print it. - */ - if (!trace_iter_expand_format(iter)) - goto print; - } - - if (p[i] == '\\' && p[i+1]) { - i++; - continue; - } - if (p[i] == '%') { - /* Need to test cases like %08.*s */ - for (j = 1; p[i+j]; j++) { - if (isdigit(p[i+j]) || - p[i+j] == '.') - continue; - if (p[i+j] == '*') { - star = true; - continue; - } - break; - } - if (p[i+j] == 's') - break; - - if (text_delta && p[i+1] == 'p' && - ((p[i+2] == 's' || p[i+2] == 'S'))) - break; - - star = false; - } - j = 0; - } - /* If no %s found then just print normally */ - if (!p[i]) - break; - - /* Copy up to the %s, and print that */ - strncpy(iter->fmt, p, i); - iter->fmt[i] = '\0'; - trace_seq_vprintf(&iter->seq, iter->fmt, ap); + event = container_of(trace_event, struct trace_event_call, event); + if (!(event->flags & TRACE_EVENT_FL_TEST_STR)) + return false; - /* Add delta to %pS pointers */ - if (p[i+1] == 'p') { - unsigned long addr; - char fmt[4]; + head = trace_get_fields(event); + if (!head) { + trace_seq_printf(seq, "FIELDS FOR EVENT '%s' NOT FOUND?\n", + trace_event_name(event)); + return true; + } - fmt[0] = '%'; - fmt[1] = 'p'; - fmt[2] = p[i+2]; /* Either %ps or %pS */ - fmt[3] = '\0'; + /* Offsets are from the iter->ent that points to the raw event */ + ptr = iter->ent; - addr = va_arg(ap, unsigned long); - addr += text_delta; - trace_seq_printf(&iter->seq, fmt, (void *)addr); + list_for_each_entry(field, head, link) { + const char *str; + bool good; - p += i + 3; + if (!field->needs_test) continue; - } - /* - * If iter->seq is full, the above call no longer guarantees - * that ap is in sync with fmt processing, and further calls - * to va_arg() can return wrong positional arguments. - * - * Ensure that ap is no longer used in this case. - */ - if (iter->seq.full) { - p = ""; - break; - } - - if (star) - len = va_arg(ap, int); - - /* The ap now points to the string data of the %s */ - str = va_arg(ap, const char *); + str = *(const char **)(ptr + field->offset); - good = trace_safe_str(iter, str, star, len); - - /* Could be from the last boot */ - if (data_delta && !good) { - str += data_delta; - good = trace_safe_str(iter, str, star, len); - } + good = trace_safe_str(iter, str); /* * If you hit this warning, it is likely that the @@ -3995,44 +3878,14 @@ void trace_check_vprintf(struct trace_iterator *iter, const char *fmt, * instead. See samples/trace_events/trace-events-sample.h * for reference. */ - if (WARN_ONCE(!good, "fmt: '%s' current_buffer: '%s'", - fmt, seq_buf_str(&iter->seq.seq))) { - int ret; - - /* Try to safely read the string */ - if (star) { - if (len + 1 > iter->fmt_size) - len = iter->fmt_size - 1; - if (len < 0) - len = 0; - ret = copy_from_kernel_nofault(iter->fmt, str, len); - iter->fmt[len] = 0; - star = false; - } else { - ret = strncpy_from_kernel_nofault(iter->fmt, str, - iter->fmt_size); - } - if (ret < 0) - trace_seq_printf(&iter->seq, "(0x%px)", str); - else - trace_seq_printf(&iter->seq, "(0x%px:%s)", - str, iter->fmt); - str = "[UNSAFE-MEMORY]"; - strcpy(iter->fmt, "%s"); - } else { - strncpy(iter->fmt, p + i, j + 1); - iter->fmt[j+1] = '\0'; + if (WARN_ONCE(!good, "event '%s' has unsafe pointer field '%s'", + trace_event_name(event), field->name)) { + trace_seq_printf(seq, "EVENT %s: HAS UNSAFE POINTER FIELD '%s'\n", + trace_event_name(event), field->name); + return true; } - if (star) - trace_seq_printf(&iter->seq, iter->fmt, len, str); - else - trace_seq_printf(&iter->seq, iter->fmt, str); - - p += i + j + 1; } - print: - if (*p) - trace_seq_vprintf(&iter->seq, p, ap); + return false; } const char *trace_event_format(struct trace_iterator *iter, const char *fmt) @@ -10577,8 +10430,6 @@ __init static int tracer_alloc_buffers(void) register_snapshot_cmd(); - test_can_verify(); - return 0; out_free_pipe_cpumask: diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 3db42bae73f8e..e45756f1ac2b1 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -644,9 +644,8 @@ void trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer, bool trace_is_tracepoint_string(const char *str); const char *trace_event_format(struct trace_iterator *iter, const char *fmt); -void trace_check_vprintf(struct trace_iterator *iter, const char *fmt, - va_list ap) __printf(2, 0); char *trace_iter_expand_format(struct trace_iterator *iter); +bool ignore_event(struct trace_iterator *iter); int trace_empty(struct trace_iterator *iter); @@ -1323,7 +1322,8 @@ struct ftrace_event_field { int filter_type; int offset; int size; - int is_signed; + unsigned int is_signed:1; + unsigned int needs_test:1; int len; }; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 2ee59b217d7d2..9d22745cdea5a 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -82,7 +82,7 @@ static int system_refcount_dec(struct event_subsystem *system) } static struct ftrace_event_field * -__find_event_field(struct list_head *head, char *name) +__find_event_field(struct list_head *head, const char *name) { struct ftrace_event_field *field; @@ -114,7 +114,8 @@ trace_find_event_field(struct trace_event_call *call, char *name) static int __trace_define_field(struct list_head *head, const char *type, const char *name, int offset, int size, - int is_signed, int filter_type, int len) + int is_signed, int filter_type, int len, + int need_test) { struct ftrace_event_field *field; @@ -133,6 +134,7 @@ static int __trace_define_field(struct list_head *head, const char *type, field->offset = offset; field->size = size; field->is_signed = is_signed; + field->needs_test = need_test; field->len = len; list_add(&field->link, head); @@ -151,13 +153,13 @@ int trace_define_field(struct trace_event_call *call, const char *type, head = trace_get_fields(call); return __trace_define_field(head, type, name, offset, size, - is_signed, filter_type, 0); + is_signed, filter_type, 0, 0); } EXPORT_SYMBOL_GPL(trace_define_field); static int trace_define_field_ext(struct trace_event_call *call, const char *type, const char *name, int offset, int size, int is_signed, - int filter_type, int len) + int filter_type, int len, int need_test) { struct list_head *head; @@ -166,13 +168,13 @@ static int trace_define_field_ext(struct trace_event_call *call, const char *typ head = trace_get_fields(call); return __trace_define_field(head, type, name, offset, size, - is_signed, filter_type, len); + is_signed, filter_type, len, need_test); } #define __generic_field(type, item, filter_type) \ ret = __trace_define_field(&ftrace_generic_fields, #type, \ #item, 0, 0, is_signed_type(type), \ - filter_type, 0); \ + filter_type, 0, 0); \ if (ret) \ return ret; @@ -181,7 +183,8 @@ static int trace_define_field_ext(struct trace_event_call *call, const char *typ "common_" #item, \ offsetof(typeof(ent), item), \ sizeof(ent.item), \ - is_signed_type(type), FILTER_OTHER, 0); \ + is_signed_type(type), FILTER_OTHER, \ + 0, 0); \ if (ret) \ return ret; @@ -332,6 +335,7 @@ static bool process_pointer(const char *fmt, int len, struct trace_event_call *c /* Return true if the string is safe */ static bool process_string(const char *fmt, int len, struct trace_event_call *call) { + struct trace_event_fields *field; const char *r, *e, *s; e = fmt + len; @@ -384,8 +388,16 @@ static bool process_string(const char *fmt, int len, struct trace_event_call *ca if (process_pointer(fmt, len, call)) return true; - /* Make sure the field is found, and consider it OK for now if it is */ - return find_event_field(fmt, call) != NULL; + /* Make sure the field is found */ + field = find_event_field(fmt, call); + if (!field) + return false; + + /* Test this field's string before printing the event */ + call->flags |= TRACE_EVENT_FL_TEST_STR; + field->needs_test = 1; + + return true; } /* @@ -2564,7 +2576,7 @@ event_define_fields(struct trace_event_call *call) ret = trace_define_field_ext(call, field->type, field->name, offset, field->size, field->is_signed, field->filter_type, - field->len); + field->len, field->needs_test); if (WARN_ON_ONCE(ret)) { pr_err("error code is %d\n", ret); break; diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index db575094c4982..2b948d35fb59e 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -317,10 +317,14 @@ EXPORT_SYMBOL(trace_raw_output_prep); void trace_event_printf(struct trace_iterator *iter, const char *fmt, ...) { + struct trace_seq *s = &iter->seq; va_list ap; + if (ignore_event(iter)) + return; + va_start(ap, fmt); - trace_check_vprintf(iter, trace_event_format(iter, fmt), ap); + trace_seq_vprintf(s, trace_event_format(iter, fmt), ap); va_end(ap); } EXPORT_SYMBOL(trace_event_printf); From 84f007b2741dabf794cb78280fd564f70f4fd190 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Mon, 27 May 2024 23:11:36 -0700 Subject: [PATCH 116/216] RDMA/bnxt_re: Allow MSN table capability check [ Upstream commit 8d310ba845827a38fcd463d86bfe3b730ce7ab8f ] FW reports the HW capability to use PSN table or MSN table and driver/library need to select it based on this capability. Use the new capability instead of the older capability check for HW retransmission while handling the MSN/PSN table. FW report zero (PSN table) for older adapters to maintain backward compatibility. Also, Updated the FW interface structures to handle the new fields. Signed-off-by: Selvin Xavier Link: https://lore.kernel.org/r/1716876697-25970-2-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky Stable-dep-of: eb867d797d29 ("RDMA/bnxt_re: Remove always true dattr validity check") Signed-off-by: Sasha Levin (cherry picked from commit c91ae7c12d6f805a670925043c6eda13114fb78f) --- drivers/infiniband/hw/bnxt_re/qplib_fp.c | 12 ++++----- drivers/infiniband/hw/bnxt_re/qplib_fp.h | 2 +- drivers/infiniband/hw/bnxt_re/qplib_res.h | 6 +++++ drivers/infiniband/hw/bnxt_re/qplib_sp.c | 1 + drivers/infiniband/hw/bnxt_re/qplib_sp.h | 1 + drivers/infiniband/hw/bnxt_re/roce_hsi.h | 30 ++++++++++++++++++++++- 6 files changed, 44 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index b624c255eee6f..3e07500dcbcfa 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -981,7 +981,7 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) u16 nsge; if (res->dattr) - qp->dev_cap_flags = res->dattr->dev_cap_flags; + qp->is_host_msn_tbl = _is_host_msn_table(res->dattr->dev_cap_flags2); sq->dbinfo.flags = 0; bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req, @@ -999,7 +999,7 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) sizeof(struct sq_psn_search_ext) : sizeof(struct sq_psn_search); - if (BNXT_RE_HW_RETX(qp->dev_cap_flags)) { + if (qp->is_host_msn_tbl) { psn_sz = sizeof(struct sq_msn_search); qp->msn = 0; } @@ -1013,7 +1013,7 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) hwq_attr.aux_depth = psn_sz ? bnxt_qplib_set_sq_size(sq, qp->wqe_mode) : 0; /* Update msn tbl size */ - if (BNXT_RE_HW_RETX(qp->dev_cap_flags) && psn_sz) { + if (qp->is_host_msn_tbl && psn_sz) { hwq_attr.aux_depth = roundup_pow_of_two(bnxt_qplib_set_sq_size(sq, qp->wqe_mode)); qp->msn_tbl_sz = hwq_attr.aux_depth; qp->msn = 0; @@ -1638,7 +1638,7 @@ static void bnxt_qplib_fill_psn_search(struct bnxt_qplib_qp *qp, if (!swq->psn_search) return; /* Handle MSN differently on cap flags */ - if (BNXT_RE_HW_RETX(qp->dev_cap_flags)) { + if (qp->is_host_msn_tbl) { bnxt_qplib_fill_msn_search(qp, wqe, swq); return; } @@ -1820,7 +1820,7 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, } swq = bnxt_qplib_get_swqe(sq, &wqe_idx); - bnxt_qplib_pull_psn_buff(qp, sq, swq, BNXT_RE_HW_RETX(qp->dev_cap_flags)); + bnxt_qplib_pull_psn_buff(qp, sq, swq, qp->is_host_msn_tbl); idx = 0; swq->slot_idx = hwq->prod; @@ -2010,7 +2010,7 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, rc = -EINVAL; goto done; } - if (!BNXT_RE_HW_RETX(qp->dev_cap_flags) || msn_update) { + if (!qp->is_host_msn_tbl || msn_update) { swq->next_psn = sq->psn & BTH_PSN_MASK; bnxt_qplib_fill_psn_search(qp, wqe, swq); } diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h index 5d4c49089a20f..3a15ca7feb2b7 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h @@ -340,7 +340,7 @@ struct bnxt_qplib_qp { struct list_head rq_flush; u32 msn; u32 msn_tbl_sz; - u16 dev_cap_flags; + bool is_host_msn_tbl; }; #define BNXT_QPLIB_MAX_CQE_ENTRY_SIZE sizeof(struct cq_base) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index f9e7aa3757cfb..c2152122a4329 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -523,6 +523,12 @@ static inline bool _is_hw_retx_supported(u16 dev_cap_flags) #define BNXT_RE_HW_RETX(a) _is_hw_retx_supported((a)) +static inline bool _is_host_msn_table(u16 dev_cap_ext_flags2) +{ + return (dev_cap_ext_flags2 & CREQ_QUERY_FUNC_RESP_SB_REQ_RETRANSMISSION_SUPPORT_MASK) == + CREQ_QUERY_FUNC_RESP_SB_REQ_RETRANSMISSION_SUPPORT_HOST_MSN_TABLE; +} + static inline u8 bnxt_qplib_dbr_pacing_en(struct bnxt_qplib_chip_ctx *cctx) { return cctx->modes.dbr_pacing; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index 0b98577cd7082..420f8613bcd51 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -165,6 +165,7 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw, attr->max_sgid = le32_to_cpu(sb->max_gid); attr->max_sgid = min_t(u32, BNXT_QPLIB_NUM_GIDS_SUPPORTED, 2 * attr->max_sgid); attr->dev_cap_flags = le16_to_cpu(sb->dev_cap_flags); + attr->dev_cap_flags2 = le16_to_cpu(sb->dev_cap_ext_flags_2); bnxt_qplib_query_version(rcfw, attr->fw_ver); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h index 755765e68eaab..2f16f3db093ea 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h @@ -73,6 +73,7 @@ struct bnxt_qplib_dev_attr { u8 tqm_alloc_reqs[MAX_TQM_ALLOC_REQ]; bool is_atomic; u16 dev_cap_flags; + u16 dev_cap_flags2; u32 max_dpi; }; diff --git a/drivers/infiniband/hw/bnxt_re/roce_hsi.h b/drivers/infiniband/hw/bnxt_re/roce_hsi.h index 2909608f4b5de..cb4e7e19fbaf0 100644 --- a/drivers/infiniband/hw/bnxt_re/roce_hsi.h +++ b/drivers/infiniband/hw/bnxt_re/roce_hsi.h @@ -2157,8 +2157,36 @@ struct creq_query_func_resp_sb { __le32 tqm_alloc_reqs[12]; __le32 max_dpi; u8 max_sge_var_wqe; - u8 reserved_8; + u8 dev_cap_ext_flags; + #define CREQ_QUERY_FUNC_RESP_SB_ATOMIC_OPS_NOT_SUPPORTED 0x1UL + #define CREQ_QUERY_FUNC_RESP_SB_DRV_VERSION_RGTR_SUPPORTED 0x2UL + #define CREQ_QUERY_FUNC_RESP_SB_CREATE_QP_BATCH_SUPPORTED 0x4UL + #define CREQ_QUERY_FUNC_RESP_SB_DESTROY_QP_BATCH_SUPPORTED 0x8UL + #define CREQ_QUERY_FUNC_RESP_SB_ROCE_STATS_EXT_CTX_SUPPORTED 0x10UL + #define CREQ_QUERY_FUNC_RESP_SB_CREATE_SRQ_SGE_SUPPORTED 0x20UL + #define CREQ_QUERY_FUNC_RESP_SB_FIXED_SIZE_WQE_DISABLED 0x40UL + #define CREQ_QUERY_FUNC_RESP_SB_DCN_SUPPORTED 0x80UL __le16 max_inline_data_var_wqe; + __le32 start_qid; + u8 max_msn_table_size; + u8 reserved8_1; + __le16 dev_cap_ext_flags_2; + #define CREQ_QUERY_FUNC_RESP_SB_OPTIMIZE_MODIFY_QP_SUPPORTED 0x1UL + #define CREQ_QUERY_FUNC_RESP_SB_CHANGE_UDP_SRC_PORT_WQE_SUPPORTED 0x2UL + #define CREQ_QUERY_FUNC_RESP_SB_CQ_COALESCING_SUPPORTED 0x4UL + #define CREQ_QUERY_FUNC_RESP_SB_MEMORY_REGION_RO_SUPPORTED 0x8UL + #define CREQ_QUERY_FUNC_RESP_SB_REQ_RETRANSMISSION_SUPPORT_MASK 0x30UL + #define CREQ_QUERY_FUNC_RESP_SB_REQ_RETRANSMISSION_SUPPORT_SFT 4 + #define CREQ_QUERY_FUNC_RESP_SB_REQ_RETRANSMISSION_SUPPORT_HOST_PSN_TABLE (0x0UL << 4) + #define CREQ_QUERY_FUNC_RESP_SB_REQ_RETRANSMISSION_SUPPORT_HOST_MSN_TABLE (0x1UL << 4) + #define CREQ_QUERY_FUNC_RESP_SB_REQ_RETRANSMISSION_SUPPORT_IQM_MSN_TABLE (0x2UL << 4) + #define CREQ_QUERY_FUNC_RESP_SB_REQ_RETRANSMISSION_SUPPORT_LAST \ + CREQ_QUERY_FUNC_RESP_SB_REQ_RETRANSMISSION_SUPPORT_IQM_MSN_TABLE + __le16 max_xp_qp_size; + __le16 create_qp_batch_size; + __le16 destroy_qp_batch_size; + __le16 reserved16; + __le64 reserved64; }; /* cmdq_set_func_resources (size:448b/56B) */ From dba98dfea2a89f37b7151ecd6ce9e972eba544e1 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 26 Nov 2024 15:10:31 +0200 Subject: [PATCH 117/216] RDMA/bnxt_re: Remove always true dattr validity check [ Upstream commit eb867d797d294a00a092b5027d08439da68940b2 ] res->dattr is always valid at this point as it was initialized during device addition in bnxt_re_add_device(). This change is fixing the following smatch error: drivers/infiniband/hw/bnxt_re/qplib_fp.c:1090 bnxt_qplib_create_qp() error: we previously assumed 'res->dattr' could be null (see line 985) Fixes: 07f830ae4913 ("RDMA/bnxt_re: Adds MSN table capability for Gen P7 adapters") Reported-by: kernel test robot Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/202411222329.YTrwonWi-lkp@intel.com/ Link: https://patch.msgid.link/be0d8836b64cba3e479fbcbca717acad04aae02e.1732626579.git.leonro@nvidia.com Acked-by: Selvin Xavier Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin (cherry picked from commit 5d1d7522cf8226e788dd193d402459efd5beff12) --- drivers/infiniband/hw/bnxt_re/qplib_fp.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index 3e07500dcbcfa..4098e01666d16 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -980,9 +980,7 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) u32 tbl_indx; u16 nsge; - if (res->dattr) - qp->is_host_msn_tbl = _is_host_msn_table(res->dattr->dev_cap_flags2); - + qp->is_host_msn_tbl = _is_host_msn_table(res->dattr->dev_cap_flags2); sq->dbinfo.flags = 0; bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req, CMDQ_BASE_OPCODE_CREATE_QP, From 856c30fd1ea1985feffdcc7d4a97bd5f56f2d5b4 Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Tue, 3 Dec 2024 15:45:37 +0200 Subject: [PATCH 118/216] RDMA/mlx5: Enforce same type port association for multiport RoCE [ Upstream commit e05feab22fd7dabcd6d272c4e2401ec1acdfdb9b ] Different core device types such as PFs and VFs shouldn't be affiliated together since they have different capabilities, fix that by enforcing type check before doing the affiliation. Fixes: 32f69e4be269 ("{net, IB}/mlx5: Manage port association for multiport RoCE") Reviewed-by: Mark Bloch Signed-off-by: Patrisious Haddad Link: https://patch.msgid.link/88699500f690dff1c1852c1ddb71f8a1cc8b956e.1733233480.git.leonro@nvidia.com Reviewed-by: Mateusz Polchlopek Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin (cherry picked from commit 25e6e9da69263824059a91a83c9d70d3fdc0ab23) --- drivers/infiniband/hw/mlx5/main.c | 6 ++++-- include/linux/mlx5/driver.h | 6 ++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index c510484e024b1..ada7dbf8eb1cf 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3372,7 +3372,8 @@ static int mlx5_ib_init_multiport_master(struct mlx5_ib_dev *dev) list_for_each_entry(mpi, &mlx5_ib_unaffiliated_port_list, list) { if (dev->sys_image_guid == mpi->sys_image_guid && - (mlx5_core_native_port_num(mpi->mdev) - 1) == i) { + (mlx5_core_native_port_num(mpi->mdev) - 1) == i && + mlx5_core_same_coredev_type(dev->mdev, mpi->mdev)) { bound = mlx5_ib_bind_slave_port(dev, mpi); } @@ -4406,7 +4407,8 @@ static int mlx5r_mp_probe(struct auxiliary_device *adev, mutex_lock(&mlx5_ib_multiport_mutex); list_for_each_entry(dev, &mlx5_ib_dev_list, ib_dev_list) { - if (dev->sys_image_guid == mpi->sys_image_guid) + if (dev->sys_image_guid == mpi->sys_image_guid && + mlx5_core_same_coredev_type(dev->mdev, mpi->mdev)) bound = mlx5_ib_bind_slave_port(dev, mpi); if (bound) { diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index ffb98bc43b2db..38a8ff9c685cb 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1225,6 +1225,12 @@ static inline bool mlx5_core_is_vf(const struct mlx5_core_dev *dev) return dev->coredev_type == MLX5_COREDEV_VF; } +static inline bool mlx5_core_same_coredev_type(const struct mlx5_core_dev *dev1, + const struct mlx5_core_dev *dev2) +{ + return dev1->coredev_type == dev2->coredev_type; +} + static inline bool mlx5_core_is_ecpf(const struct mlx5_core_dev *dev) { return dev->caps.embedded_cpu; From 2191adf674ef18fba6a515548bef49b0d5842c2b Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Wed, 4 Dec 2024 13:24:13 +0530 Subject: [PATCH 119/216] RDMA/bnxt_re: Avoid initializing the software queue for user queues [ Upstream commit 5effcacc8a8f3eb2a9f069d7e81a9ac793598dfb ] Software Queues to hold the WRs needs to be created for only kernel queues. Avoid allocating the unnecessary memory for user Queues. Fixes: 1ac5a4047975 ("RDMA/bnxt_re: Add bnxt_re RoCE driver") Fixes: 159fb4ceacd7 ("RDMA/bnxt_re: introduce a function to allocate swq") Signed-off-by: Selvin Xavier Link: https://patch.msgid.link/20241204075416.478431-3-kalesh-anakkur.purayil@broadcom.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin (cherry picked from commit a5092b138e1c89c0b6f52daabaac03d7d109485e) --- drivers/infiniband/hw/bnxt_re/qplib_fp.c | 42 +++++++++++++----------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index 4098e01666d16..d38e7880cebb5 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -639,13 +639,6 @@ int bnxt_qplib_create_srq(struct bnxt_qplib_res *res, rc = bnxt_qplib_alloc_init_hwq(&srq->hwq, &hwq_attr); if (rc) return rc; - - srq->swq = kcalloc(srq->hwq.max_elements, sizeof(*srq->swq), - GFP_KERNEL); - if (!srq->swq) { - rc = -ENOMEM; - goto fail; - } srq->dbinfo.flags = 0; bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req, CMDQ_BASE_OPCODE_CREATE_SRQ, @@ -674,9 +667,17 @@ int bnxt_qplib_create_srq(struct bnxt_qplib_res *res, spin_lock_init(&srq->lock); srq->start_idx = 0; srq->last_idx = srq->hwq.max_elements - 1; - for (idx = 0; idx < srq->hwq.max_elements; idx++) - srq->swq[idx].next_idx = idx + 1; - srq->swq[srq->last_idx].next_idx = -1; + if (!srq->hwq.is_user) { + srq->swq = kcalloc(srq->hwq.max_elements, sizeof(*srq->swq), + GFP_KERNEL); + if (!srq->swq) { + rc = -ENOMEM; + goto fail; + } + for (idx = 0; idx < srq->hwq.max_elements; idx++) + srq->swq[idx].next_idx = idx + 1; + srq->swq[srq->last_idx].next_idx = -1; + } srq->id = le32_to_cpu(resp.xid); srq->dbinfo.hwq = &srq->hwq; @@ -1022,13 +1023,14 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) if (rc) return rc; - rc = bnxt_qplib_alloc_init_swq(sq); - if (rc) - goto fail_sq; - - if (psn_sz) - bnxt_qplib_init_psn_ptr(qp, psn_sz); + if (!sq->hwq.is_user) { + rc = bnxt_qplib_alloc_init_swq(sq); + if (rc) + goto fail_sq; + if (psn_sz) + bnxt_qplib_init_psn_ptr(qp, psn_sz); + } req.sq_size = cpu_to_le32(bnxt_qplib_set_sq_size(sq, qp->wqe_mode)); pbl = &sq->hwq.pbl[PBL_LVL_0]; req.sq_pbl = cpu_to_le64(pbl->pg_map_arr[0]); @@ -1054,9 +1056,11 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) rc = bnxt_qplib_alloc_init_hwq(&rq->hwq, &hwq_attr); if (rc) goto sq_swq; - rc = bnxt_qplib_alloc_init_swq(rq); - if (rc) - goto fail_rq; + if (!rq->hwq.is_user) { + rc = bnxt_qplib_alloc_init_swq(rq); + if (rc) + goto fail_rq; + } req.rq_size = cpu_to_le32(rq->max_wqe); pbl = &rq->hwq.pbl[PBL_LVL_0]; From 8df35e036cd0841cc651aae33cf35ffa41f7c3e1 Mon Sep 17 00:00:00 2001 From: Kashyap Desai Date: Wed, 4 Dec 2024 13:24:14 +0530 Subject: [PATCH 120/216] RDMA/bnxt_re: Avoid sending the modify QP workaround for latest adapters [ Upstream commit 064c22408a73b9e945139b64614c534cbbefb591 ] The workaround to modify the UD QP from RTS to RTS is required only for older adapters. Issuing this for latest adapters can caus some unexpected behavior. Fix it Fixes: 1801d87b3598 ("RDMA/bnxt_re: Support new 5760X P7 devices") Signed-off-by: Kashyap Desai Signed-off-by: Selvin Xavier Link: https://patch.msgid.link/20241204075416.478431-4-kalesh-anakkur.purayil@broadcom.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin (cherry picked from commit a0ceed736c88d4f105f5951612f45ea68b2c4c35) --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index df58972606014..fb6f15bb9d4f7 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -2710,7 +2710,8 @@ static int bnxt_re_post_send_shadow_qp(struct bnxt_re_dev *rdev, wr = wr->next; } bnxt_qplib_post_send_db(&qp->qplib_qp); - bnxt_ud_qp_hw_stall_workaround(qp); + if (!bnxt_qplib_is_chip_gen_p5_p7(qp->rdev->chip_ctx)) + bnxt_ud_qp_hw_stall_workaround(qp); spin_unlock_irqrestore(&qp->sq_lock, flags); return rc; } @@ -2822,7 +2823,8 @@ int bnxt_re_post_send(struct ib_qp *ib_qp, const struct ib_send_wr *wr, wr = wr->next; } bnxt_qplib_post_send_db(&qp->qplib_qp); - bnxt_ud_qp_hw_stall_workaround(qp); + if (!bnxt_qplib_is_chip_gen_p5_p7(qp->rdev->chip_ctx)) + bnxt_ud_qp_hw_stall_workaround(qp); spin_unlock_irqrestore(&qp->sq_lock, flags); return rc; From 3e3aff56b3b52651bff3d924651709b5e3579d20 Mon Sep 17 00:00:00 2001 From: Robert Beckett Date: Tue, 12 Nov 2024 19:50:00 +0000 Subject: [PATCH 121/216] nvme-pci: 512 byte aligned dma pool segment quirk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit ebefac5647968679f6ef5803e5d35a71997d20fa ] We initially introduced a quick fix limiting the queue depth to 1 as experimentation showed that it fixed data corruption on 64GB steamdecks. Further experimentation revealed corruption only happens when the last PRP data element aligns to the end of the page boundary. The device appears to treat this as a PRP chain to a new list instead of the data element that it actually is. This implementation is in violation of the spec. Encountering this errata with the Linux driver requires the host request a 128k transfer and coincidently be handed the last small pool dma buffer within a page. The QD1 quirk effectly works around this because the last data PRP always was at a 248 byte offset from the page start, so it never appeared at the end of the page, but comes at the expense of throttling IO and wasting the remainder of the PRP page beyond 256 bytes. Also to note, the MDTS on these devices is small enough that the "large" prp pool can hold enough PRP elements to never reach the end, so that pool is not a problem either. Introduce a new quirk to ensure the small pool is always aligned such that the last PRP element can't appear a the end of the page. This comes at the expense of wasting 256 bytes per small pool page allocated. Link: https://lore.kernel.org/linux-nvme/20241113043151.GA20077@lst.de/T/#u Fixes: 83bdfcbdbe5d ("nvme-pci: qdepth 1 quirk") Cc: Paweł Anikiel Signed-off-by: Robert Beckett Signed-off-by: Keith Busch Signed-off-by: Sasha Levin (cherry picked from commit 347654387bb15bb5ddfe161fcd7df1b44e1f53d7) --- drivers/nvme/host/nvme.h | 5 +++++ drivers/nvme/host/pci.c | 9 +++++++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index bddc068d58c7e..e867ac859a878 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -172,6 +172,11 @@ enum nvme_quirks { * MSI (but not MSI-X) interrupts are broken and never fire. */ NVME_QUIRK_BROKEN_MSI = (1 << 21), + + /* + * Align dma pool segment size to 512 bytes + */ + NVME_QUIRK_DMAPOOL_ALIGN_512 = (1 << 22), }; /* diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index d525fa1229d79..52c8fd3d5c479 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2653,15 +2653,20 @@ static int nvme_disable_prepare_reset(struct nvme_dev *dev, bool shutdown) static int nvme_setup_prp_pools(struct nvme_dev *dev) { + size_t small_align = 256; + dev->prp_page_pool = dma_pool_create("prp list page", dev->dev, NVME_CTRL_PAGE_SIZE, NVME_CTRL_PAGE_SIZE, 0); if (!dev->prp_page_pool) return -ENOMEM; + if (dev->ctrl.quirks & NVME_QUIRK_DMAPOOL_ALIGN_512) + small_align = 512; + /* Optimisation for I/Os between 4k and 128k */ dev->prp_small_pool = dma_pool_create("prp list 256", dev->dev, - 256, 256, 0); + 256, small_align, 0); if (!dev->prp_small_pool) { dma_pool_destroy(dev->prp_page_pool); return -ENOMEM; @@ -3403,7 +3408,7 @@ static const struct pci_device_id nvme_id_table[] = { { PCI_VDEVICE(REDHAT, 0x0010), /* Qemu emulated controller */ .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x1217, 0x8760), /* O2 Micro 64GB Steam Deck */ - .driver_data = NVME_QUIRK_QDEPTH_ONE }, + .driver_data = NVME_QUIRK_DMAPOOL_ALIGN_512, }, { PCI_DEVICE(0x126f, 0x2262), /* Silicon Motion generic */ .driver_data = NVME_QUIRK_NO_DEEPEST_PS | NVME_QUIRK_BOGUS_NID, }, From da864b91768c1c9865cf917e7568fc6136c31fec Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Wed, 11 Dec 2024 14:09:27 +0530 Subject: [PATCH 122/216] RDMA/bnxt_re: Fix the check for 9060 condition [ Upstream commit 38651476e46e088598354510502c383e932e2297 ] The check for 9060 condition should only be made for legacy chips. Fixes: 9152e0b722b2 ("RDMA/bnxt_re: HW workarounds for handling specific conditions") Reviewed-by: Kashyap Desai Signed-off-by: Kalesh AP Signed-off-by: Selvin Xavier Link: https://patch.msgid.link/20241211083931.968831-2-kalesh-anakkur.purayil@broadcom.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin (cherry picked from commit 183a96174cabed76440512ceaf1465bbb0a65925) --- drivers/infiniband/hw/bnxt_re/qplib_fp.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index d38e7880cebb5..8997f359b58b3 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -2536,10 +2536,12 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq, bnxt_qplib_add_flush_qp(qp); } else { /* Before we complete, do WA 9060 */ - if (do_wa9060(qp, cq, cq_cons, sq->swq_last, - cqe_sq_cons)) { - *lib_qp = qp; - goto out; + if (!bnxt_qplib_is_chip_gen_p5_p7(qp->cctx)) { + if (do_wa9060(qp, cq, cq_cons, sq->swq_last, + cqe_sq_cons)) { + *lib_qp = qp; + goto out; + } } if (swq->flags & SQ_SEND_FLAGS_SIGNAL_COMP) { cqe->status = CQ_REQ_STATUS_OK; From 0406239917ace92d8b6787d9303d24834bea5a03 Mon Sep 17 00:00:00 2001 From: Saravanan Vajravel Date: Wed, 11 Dec 2024 14:09:28 +0530 Subject: [PATCH 123/216] RDMA/bnxt_re: Add check for path mtu in modify_qp [ Upstream commit 798653a0ee30d3cd495099282751c0f248614ae7 ] When RDMA app configures path MTU, add a check in modify_qp verb to make sure that it doesn't go beyond interface MTU. If this check fails, driver will fail the modify_qp verb. Fixes: 1ac5a4047975 ("RDMA/bnxt_re: Add bnxt_re RoCE driver") Reviewed-by: Kalesh AP Signed-off-by: Saravanan Vajravel Signed-off-by: Selvin Xavier Link: https://patch.msgid.link/20241211083931.968831-3-kalesh-anakkur.purayil@broadcom.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin (cherry picked from commit 14f66ac898c9e74fa8d67659cd131dd48bcccf32) --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 26 +++++++++++++----------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index fb6f15bb9d4f7..9bf00fb666d76 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -2055,18 +2055,20 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr, } } - if (qp_attr_mask & IB_QP_PATH_MTU) { - qp->qplib_qp.modify_flags |= - CMDQ_MODIFY_QP_MODIFY_MASK_PATH_MTU; - qp->qplib_qp.path_mtu = __from_ib_mtu(qp_attr->path_mtu); - qp->qplib_qp.mtu = ib_mtu_enum_to_int(qp_attr->path_mtu); - } else if (qp_attr->qp_state == IB_QPS_RTR) { - qp->qplib_qp.modify_flags |= - CMDQ_MODIFY_QP_MODIFY_MASK_PATH_MTU; - qp->qplib_qp.path_mtu = - __from_ib_mtu(iboe_get_mtu(rdev->netdev->mtu)); - qp->qplib_qp.mtu = - ib_mtu_enum_to_int(iboe_get_mtu(rdev->netdev->mtu)); + if (qp_attr->qp_state == IB_QPS_RTR) { + enum ib_mtu qpmtu; + + qpmtu = iboe_get_mtu(rdev->netdev->mtu); + if (qp_attr_mask & IB_QP_PATH_MTU) { + if (ib_mtu_enum_to_int(qp_attr->path_mtu) > + ib_mtu_enum_to_int(qpmtu)) + return -EINVAL; + qpmtu = qp_attr->path_mtu; + } + + qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_PATH_MTU; + qp->qplib_qp.path_mtu = __from_ib_mtu(qpmtu); + qp->qplib_qp.mtu = ib_mtu_enum_to_int(qpmtu); } if (qp_attr_mask & IB_QP_TIMEOUT) { From 20ca2e3214d545e98b96647e208c46963a065604 Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Wed, 11 Dec 2024 14:09:31 +0530 Subject: [PATCH 124/216] RDMA/bnxt_re: Fix reporting hw_ver in query_device [ Upstream commit 7179fe0074a3c962e43a9e51169304c4911989ed ] Driver currently populates subsystem_device id in the "hw_ver" field of ib_attr structure in query_device. Updated to populate PCI revision ID. Fixes: 1ac5a4047975 ("RDMA/bnxt_re: Add bnxt_re RoCE driver") Reviewed-by: Preethi G Signed-off-by: Kalesh AP Signed-off-by: Selvin Xavier Link: https://patch.msgid.link/20241211083931.968831-6-kalesh-anakkur.purayil@broadcom.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin (cherry picked from commit 38b49312da2d82cee9ec6f56cb76f7d4696046ec) --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 9bf00fb666d76..9e8f86f488019 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -147,7 +147,7 @@ int bnxt_re_query_device(struct ib_device *ibdev, ib_attr->vendor_id = rdev->en_dev->pdev->vendor; ib_attr->vendor_part_id = rdev->en_dev->pdev->device; - ib_attr->hw_ver = rdev->en_dev->pdev->subsystem_device; + ib_attr->hw_ver = rdev->en_dev->pdev->revision; ib_attr->max_qp = dev_attr->max_qp; ib_attr->max_qp_wr = dev_attr->max_qp_wqes; ib_attr->device_cap_flags = From d8dc6f014e104f1de5b6e5688f33621937531811 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Tue, 17 Dec 2024 15:56:45 +0530 Subject: [PATCH 125/216] RDMA/bnxt_re: Fix max_qp_wrs reported [ Upstream commit 40be32303ec829ea12f9883e499bfd3fe9e52baf ] While creating qps, driver adds one extra entry to the sq size passed by the ULPs in order to avoid queue full condition. When ULPs creates QPs with max_qp_wr reported, driver creates QP with 1 more than the max_wqes supported by HW. Create QP fails in this case. To avoid this error, reduce 1 entry in max_qp_wqes and report it to the stack. Fixes: 1ac5a4047975 ("RDMA/bnxt_re: Add bnxt_re RoCE driver") Reviewed-by: Kalesh AP Signed-off-by: Selvin Xavier Link: https://patch.msgid.link/20241217102649.1377704-2-kalesh-anakkur.purayil@broadcom.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin (cherry picked from commit 9fcfe972758b7e097f378230dfe0ca524535eb6f) --- drivers/infiniband/hw/bnxt_re/qplib_sp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index 420f8613bcd51..0f6bae009af1a 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -127,7 +127,7 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw, attr->max_qp_init_rd_atom = sb->max_qp_init_rd_atom > BNXT_QPLIB_MAX_OUT_RD_ATOM ? BNXT_QPLIB_MAX_OUT_RD_ATOM : sb->max_qp_init_rd_atom; - attr->max_qp_wqes = le16_to_cpu(sb->max_qp_wr); + attr->max_qp_wqes = le16_to_cpu(sb->max_qp_wr) - 1; /* * 128 WQEs needs to be reserved for the HW (8916). Prevent * reporting the max number From f54f0cb38508f37db31bf58c3c4ea7be784575ca Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Sun, 18 Aug 2024 21:47:23 -0700 Subject: [PATCH 126/216] RDMA/bnxt_re: Add support for Variable WQE in Genp7 adapters [ Upstream commit de1d364c3815f9360a0945097ca2731950e914fa ] Variable size WQE means that each send Work Queue Entry to HW can use different WQE sizes as opposed to the static WQE size on the current devices. Set variable WQE mode for Gen P7 devices. Depth of the Queue will be a multiple of slot which is 16 bytes. The number of slots should be a multiple of 256 as per the HW requirement. Initialize the Software shadow queue to hold requests equal to the number of slots. Also, do not expose the variable size WQE capability until the last patch in the series. Link: https://patch.msgid.link/r/1724042847-1481-2-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Hongguang Gao Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe Stable-dep-of: d5a38bf2f359 ("RDMA/bnxt_re: Disable use of reserved wqes") Signed-off-by: Sasha Levin (cherry picked from commit bb46a484a0c61fd0552e838753ed6f07739cac3a) --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 8 +++++--- drivers/infiniband/hw/bnxt_re/main.c | 21 +++++++++++---------- drivers/infiniband/hw/bnxt_re/qplib_fp.c | 18 +++++++++--------- drivers/infiniband/hw/bnxt_re/qplib_fp.h | 14 +++++++++++--- drivers/infiniband/hw/bnxt_re/qplib_sp.c | 7 +++++-- drivers/infiniband/hw/bnxt_re/qplib_sp.h | 6 ++++++ 6 files changed, 47 insertions(+), 27 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 9e8f86f488019..540998ddbb445 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -1154,6 +1154,7 @@ static struct bnxt_re_qp *bnxt_re_create_shadow_qp /* Shadow QP SQ depth should be same as QP1 RQ depth */ qp->qplib_qp.sq.wqe_size = bnxt_re_get_wqe_size(0, 6); qp->qplib_qp.sq.max_wqe = qp1_qp->rq.max_wqe; + qp->qplib_qp.sq.max_sw_wqe = qp1_qp->rq.max_wqe; qp->qplib_qp.sq.max_sge = 2; /* Q full delta can be 1 since it is internal QP */ qp->qplib_qp.sq.q_full_delta = 1; @@ -1165,6 +1166,7 @@ static struct bnxt_re_qp *bnxt_re_create_shadow_qp qp->qplib_qp.rq.wqe_size = bnxt_re_get_rwqe_size(6); qp->qplib_qp.rq.max_wqe = qp1_qp->rq.max_wqe; + qp->qplib_qp.rq.max_sw_wqe = qp1_qp->rq.max_wqe; qp->qplib_qp.rq.max_sge = qp1_qp->rq.max_sge; /* Q full delta can be 1 since it is internal QP */ qp->qplib_qp.rq.q_full_delta = 1; @@ -1226,6 +1228,7 @@ static int bnxt_re_init_rq_attr(struct bnxt_re_qp *qp, */ entries = bnxt_re_init_depth(init_attr->cap.max_recv_wr + 1, uctx); rq->max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + 1); + rq->max_sw_wqe = rq->max_wqe; rq->q_full_delta = 0; rq->sg_info.pgsize = PAGE_SIZE; rq->sg_info.pgshft = PAGE_SHIFT; @@ -1285,6 +1288,7 @@ static int bnxt_re_init_sq_attr(struct bnxt_re_qp *qp, 0 : BNXT_QPLIB_RESERVED_QP_WRS; entries = bnxt_re_init_depth(entries + diff + 1, uctx); sq->max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + diff + 1); + sq->max_sw_wqe = bnxt_qplib_get_depth(sq, qplqp->wqe_mode, true); sq->q_full_delta = diff + 1; /* * Reserving one slot for Phantom WQE. Application can @@ -2155,6 +2159,7 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr, entries = bnxt_re_init_depth(qp_attr->cap.max_recv_wr, uctx); qp->qplib_qp.rq.max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + 1); + qp->qplib_qp.rq.max_sw_wqe = qp->qplib_qp.rq.max_wqe; qp->qplib_qp.rq.q_full_delta = qp->qplib_qp.rq.max_wqe - qp_attr->cap.max_recv_wr; qp->qplib_qp.rq.max_sge = qp_attr->cap.max_recv_sge; @@ -4171,9 +4176,6 @@ int bnxt_re_alloc_ucontext(struct ib_ucontext *ctx, struct ib_udata *udata) resp.cqe_sz = sizeof(struct cq_base); resp.max_cqd = dev_attr->max_cq_wqes; - resp.comp_mask |= BNXT_RE_UCNTX_CMASK_HAVE_MODE; - resp.mode = rdev->chip_ctx->modes.wqe_mode; - if (rdev->chip_ctx->modes.db_push) resp.comp_mask |= BNXT_RE_UCNTX_CMASK_WC_DPI_ENABLED; diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 0373d0e9db632..c7e51cc2ea268 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -128,13 +128,13 @@ static void bnxt_re_set_db_offset(struct bnxt_re_dev *rdev) } } -static void bnxt_re_set_drv_mode(struct bnxt_re_dev *rdev, u8 mode) +static void bnxt_re_set_drv_mode(struct bnxt_re_dev *rdev) { struct bnxt_qplib_chip_ctx *cctx; cctx = rdev->chip_ctx; - cctx->modes.wqe_mode = bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx) ? - mode : BNXT_QPLIB_WQE_MODE_STATIC; + cctx->modes.wqe_mode = bnxt_qplib_is_chip_gen_p7(rdev->chip_ctx) ? + BNXT_QPLIB_WQE_MODE_VARIABLE : BNXT_QPLIB_WQE_MODE_STATIC; if (bnxt_re_hwrm_qcaps(rdev)) dev_err(rdev_to_dev(rdev), "Failed to query hwrm qcaps\n"); @@ -155,7 +155,7 @@ static void bnxt_re_destroy_chip_ctx(struct bnxt_re_dev *rdev) kfree(chip_ctx); } -static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev, u8 wqe_mode) +static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev) { struct bnxt_qplib_chip_ctx *chip_ctx; struct bnxt_en_dev *en_dev; @@ -177,7 +177,7 @@ static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev, u8 wqe_mode) rdev->qplib_res.dattr = &rdev->dev_attr; rdev->qplib_res.is_vf = BNXT_EN_VF(en_dev); - bnxt_re_set_drv_mode(rdev, wqe_mode); + bnxt_re_set_drv_mode(rdev); bnxt_re_set_db_offset(rdev); rc = bnxt_qplib_map_db_bar(&rdev->qplib_res); @@ -1440,7 +1440,7 @@ static void bnxt_re_worker(struct work_struct *work) schedule_delayed_work(&rdev->worker, msecs_to_jiffies(30000)); } -static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 wqe_mode) +static int bnxt_re_dev_init(struct bnxt_re_dev *rdev) { struct bnxt_re_ring_attr rattr = {}; struct bnxt_qplib_creq_ctx *creq; @@ -1458,7 +1458,7 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 wqe_mode) } set_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags); - rc = bnxt_re_setup_chip_ctx(rdev, wqe_mode); + rc = bnxt_re_setup_chip_ctx(rdev); if (rc) { bnxt_unregister_dev(rdev->en_dev); clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags); @@ -1609,7 +1609,7 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 wqe_mode) return rc; } -static int bnxt_re_add_device(struct auxiliary_device *adev, u8 wqe_mode) +static int bnxt_re_add_device(struct auxiliary_device *adev) { struct bnxt_aux_priv *aux_priv = container_of(adev, struct bnxt_aux_priv, aux_dev); @@ -1626,7 +1626,7 @@ static int bnxt_re_add_device(struct auxiliary_device *adev, u8 wqe_mode) goto exit; } - rc = bnxt_re_dev_init(rdev, wqe_mode); + rc = bnxt_re_dev_init(rdev); if (rc) goto re_dev_dealloc; @@ -1756,7 +1756,8 @@ static int bnxt_re_probe(struct auxiliary_device *adev, int rc; mutex_lock(&bnxt_re_mutex); - rc = bnxt_re_add_device(adev, BNXT_QPLIB_WQE_MODE_STATIC); + + rc = bnxt_re_add_device(adev); if (rc) { mutex_unlock(&bnxt_re_mutex); return rc; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index 8997f359b58b3..2f85245d12853 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -807,13 +807,13 @@ static int bnxt_qplib_alloc_init_swq(struct bnxt_qplib_q *que) { int indx; - que->swq = kcalloc(que->max_wqe, sizeof(*que->swq), GFP_KERNEL); + que->swq = kcalloc(que->max_sw_wqe, sizeof(*que->swq), GFP_KERNEL); if (!que->swq) return -ENOMEM; que->swq_start = 0; - que->swq_last = que->max_wqe - 1; - for (indx = 0; indx < que->max_wqe; indx++) + que->swq_last = que->max_sw_wqe - 1; + for (indx = 0; indx < que->max_sw_wqe; indx++) que->swq[indx].next_idx = indx + 1; que->swq[que->swq_last].next_idx = 0; /* Make it circular */ que->swq_last = 0; @@ -849,7 +849,7 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) hwq_attr.res = res; hwq_attr.sginfo = &sq->sg_info; hwq_attr.stride = sizeof(struct sq_sge); - hwq_attr.depth = bnxt_qplib_get_depth(sq); + hwq_attr.depth = bnxt_qplib_get_depth(sq, qp->wqe_mode, false); hwq_attr.type = HWQ_TYPE_QUEUE; rc = bnxt_qplib_alloc_init_hwq(&sq->hwq, &hwq_attr); if (rc) @@ -877,7 +877,7 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) hwq_attr.res = res; hwq_attr.sginfo = &rq->sg_info; hwq_attr.stride = sizeof(struct sq_sge); - hwq_attr.depth = bnxt_qplib_get_depth(rq); + hwq_attr.depth = bnxt_qplib_get_depth(rq, qp->wqe_mode, false); hwq_attr.type = HWQ_TYPE_QUEUE; rc = bnxt_qplib_alloc_init_hwq(&rq->hwq, &hwq_attr); if (rc) @@ -1007,7 +1007,7 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) hwq_attr.res = res; hwq_attr.sginfo = &sq->sg_info; hwq_attr.stride = sizeof(struct sq_sge); - hwq_attr.depth = bnxt_qplib_get_depth(sq); + hwq_attr.depth = bnxt_qplib_get_depth(sq, qp->wqe_mode, true); hwq_attr.aux_stride = psn_sz; hwq_attr.aux_depth = psn_sz ? bnxt_qplib_set_sq_size(sq, qp->wqe_mode) : 0; @@ -1049,7 +1049,7 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) hwq_attr.res = res; hwq_attr.sginfo = &rq->sg_info; hwq_attr.stride = sizeof(struct sq_sge); - hwq_attr.depth = bnxt_qplib_get_depth(rq); + hwq_attr.depth = bnxt_qplib_get_depth(rq, qp->wqe_mode, false); hwq_attr.aux_stride = 0; hwq_attr.aux_depth = 0; hwq_attr.type = HWQ_TYPE_QUEUE; @@ -2493,7 +2493,7 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq, } sq = &qp->sq; - cqe_sq_cons = le16_to_cpu(hwcqe->sq_cons_idx) % sq->max_wqe; + cqe_sq_cons = le16_to_cpu(hwcqe->sq_cons_idx) % sq->max_sw_wqe; if (qp->sq.flushed) { dev_dbg(&cq->hwq.pdev->dev, "%s: QP in Flush QP = %p\n", __func__, qp); @@ -2885,7 +2885,7 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq, cqe_cons = le16_to_cpu(hwcqe->sq_cons_idx); if (cqe_cons == 0xFFFF) goto do_rq; - cqe_cons %= sq->max_wqe; + cqe_cons %= sq->max_sw_wqe; if (qp->sq.flushed) { dev_dbg(&cq->hwq.pdev->dev, diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h index 3a15ca7feb2b7..b64746d484d63 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h @@ -251,6 +251,7 @@ struct bnxt_qplib_q { struct bnxt_qplib_db_info dbinfo; struct bnxt_qplib_sg_info sg_info; u32 max_wqe; + u32 max_sw_wqe; u16 wqe_size; u16 q_full_delta; u16 max_sge; @@ -585,15 +586,22 @@ static inline void bnxt_qplib_swq_mod_start(struct bnxt_qplib_q *que, u32 idx) que->swq_start = que->swq[idx].next_idx; } -static inline u32 bnxt_qplib_get_depth(struct bnxt_qplib_q *que) +static inline u32 bnxt_qplib_get_depth(struct bnxt_qplib_q *que, u8 wqe_mode, bool is_sq) { - return (que->wqe_size * que->max_wqe) / sizeof(struct sq_sge); + u32 slots; + + /* Queue depth is the number of slots. */ + slots = (que->wqe_size * que->max_wqe) / sizeof(struct sq_sge); + /* For variable WQE mode, need to align the slots to 256 */ + if (wqe_mode == BNXT_QPLIB_WQE_MODE_VARIABLE && is_sq) + slots = ALIGN(slots, BNXT_VAR_MAX_SLOT_ALIGN); + return slots; } static inline u32 bnxt_qplib_set_sq_size(struct bnxt_qplib_q *que, u8 wqe_mode) { return (wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) ? - que->max_wqe : bnxt_qplib_get_depth(que); + que->max_wqe : bnxt_qplib_get_depth(que, wqe_mode, true); } static inline u32 bnxt_qplib_set_sq_max_slot(u8 wqe_mode) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index 0f6bae009af1a..a46df2a5ab334 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -95,11 +95,13 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw, struct bnxt_qplib_cmdqmsg msg = {}; struct creq_query_func_resp_sb *sb; struct bnxt_qplib_rcfw_sbuf sbuf; + struct bnxt_qplib_chip_ctx *cctx; struct cmdq_query_func req = {}; u8 *tqm_alloc; int i, rc; u32 temp; + cctx = rcfw->res->cctx; bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req, CMDQ_BASE_OPCODE_QUERY_FUNC, sizeof(req)); @@ -133,8 +135,9 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw, * reporting the max number */ attr->max_qp_wqes -= BNXT_QPLIB_RESERVED_QP_WRS + 1; - attr->max_qp_sges = bnxt_qplib_is_chip_gen_p5_p7(rcfw->res->cctx) ? - 6 : sb->max_sge; + + attr->max_qp_sges = cctx->modes.wqe_mode == BNXT_QPLIB_WQE_MODE_VARIABLE ? + min_t(u32, sb->max_sge_var_wqe, BNXT_VAR_MAX_SGE) : 6; attr->max_cq = le32_to_cpu(sb->max_cq); attr->max_cq_wqes = le32_to_cpu(sb->max_cqe); if (!bnxt_qplib_is_chip_gen_p7(rcfw->res->cctx)) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h index 2f16f3db093ea..b91e6a85e75d9 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h @@ -40,6 +40,7 @@ #ifndef __BNXT_QPLIB_SP_H__ #define __BNXT_QPLIB_SP_H__ +#include #define BNXT_QPLIB_RESERVED_QP_WRS 128 struct bnxt_qplib_dev_attr { @@ -352,4 +353,9 @@ int bnxt_qplib_qext_stat(struct bnxt_qplib_rcfw *rcfw, u32 fid, int bnxt_qplib_modify_cc(struct bnxt_qplib_res *res, struct bnxt_qplib_cc_param *cc_param); +#define BNXT_VAR_MAX_WQE 4352 +#define BNXT_VAR_MAX_SLOT_ALIGN 256 +#define BNXT_VAR_MAX_SGE 13 +#define BNXT_RE_MAX_RQ_WQES 65536 + #endif /* __BNXT_QPLIB_SP_H__*/ From 4b5ca9db457285404c69c4ac5ff31e7f28beba2e Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Tue, 17 Dec 2024 15:56:46 +0530 Subject: [PATCH 127/216] RDMA/bnxt_re: Disable use of reserved wqes [ Upstream commit d5a38bf2f35979537c526acbc56bc435ed40685f ] Disabling the reserved wqes logic for Gen P5/P7 devices because this workaround is required only for legacy devices. Fixes: ecb53febfcad ("RDMA/bnxt_en: Enable RDMA driver support for 57500 chip") Signed-off-by: Kalesh AP Signed-off-by: Selvin Xavier Link: https://patch.msgid.link/20241217102649.1377704-3-kalesh-anakkur.purayil@broadcom.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin (cherry picked from commit 3ae9ee7ff3b28a29fe0b91f730580c0544254043) --- drivers/infiniband/hw/bnxt_re/qplib_sp.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index a46df2a5ab334..577a6eaca4cee 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -130,11 +130,13 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw, sb->max_qp_init_rd_atom > BNXT_QPLIB_MAX_OUT_RD_ATOM ? BNXT_QPLIB_MAX_OUT_RD_ATOM : sb->max_qp_init_rd_atom; attr->max_qp_wqes = le16_to_cpu(sb->max_qp_wr) - 1; - /* - * 128 WQEs needs to be reserved for the HW (8916). Prevent - * reporting the max number - */ - attr->max_qp_wqes -= BNXT_QPLIB_RESERVED_QP_WRS + 1; + if (!bnxt_qplib_is_chip_gen_p5_p7(rcfw->res->cctx)) { + /* + * 128 WQEs needs to be reserved for the HW (8916). Prevent + * reporting the max number on legacy devices + */ + attr->max_qp_wqes -= BNXT_QPLIB_RESERVED_QP_WRS + 1; + } attr->max_qp_sges = cctx->modes.wqe_mode == BNXT_QPLIB_WQE_MODE_VARIABLE ? min_t(u32, sb->max_sge_var_wqe, BNXT_VAR_MAX_SGE) : 6; From e1b9ca9625bc050612ac6fa210a6adc51caae795 Mon Sep 17 00:00:00 2001 From: Damodharam Ammepalli Date: Tue, 17 Dec 2024 15:56:47 +0530 Subject: [PATCH 128/216] RDMA/bnxt_re: Add send queue size check for variable wqe [ Upstream commit d13be54dc18baee7a3e44349b80755a8c8205d3f ] For the fixed WQE case, HW supports 0xFFFF WQEs. For variable Size WQEs, HW treats this number as the 16 bytes slots. The maximum supported WQEs needs to be adjusted based on the number of slots. Set a maximum WQE limit for variable WQE scenario. Fixes: de1d364c3815 ("RDMA/bnxt_re: Add support for Variable WQE in Genp7 adapters") Reviewed-by: Kalesh AP Signed-off-by: Damodharam Ammepalli Signed-off-by: Selvin Xavier Link: https://patch.msgid.link/20241217102649.1377704-4-kalesh-anakkur.purayil@broadcom.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin (cherry picked from commit 2e719d89b9fad00d5a07c6cd399fd9f9d4d46202) --- drivers/infiniband/hw/bnxt_re/qplib_sp.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index 577a6eaca4cee..74c3f6b26c4d3 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -138,6 +138,10 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw, attr->max_qp_wqes -= BNXT_QPLIB_RESERVED_QP_WRS + 1; } + /* Adjust for max_qp_wqes for variable wqe */ + if (cctx->modes.wqe_mode == BNXT_QPLIB_WQE_MODE_VARIABLE) + attr->max_qp_wqes = BNXT_VAR_MAX_WQE - 1; + attr->max_qp_sges = cctx->modes.wqe_mode == BNXT_QPLIB_WQE_MODE_VARIABLE ? min_t(u32, sb->max_sge_var_wqe, BNXT_VAR_MAX_SGE) : 6; attr->max_cq = le32_to_cpu(sb->max_cq); From d23f64bcf680e35dbc9ac16bdb2bc34c86d5bb15 Mon Sep 17 00:00:00 2001 From: Damodharam Ammepalli Date: Tue, 17 Dec 2024 15:56:48 +0530 Subject: [PATCH 129/216] RDMA/bnxt_re: Fix MSN table size for variable wqe mode [ Upstream commit bb839f3ace0fee532a0487b692cc4d868fccb7cf ] For variable size wqe mode, the MSN table size should be half the size of the SQ depth. Fixing this to avoid wrap around problems in the retransmission path. Fixes: de1d364c3815 ("RDMA/bnxt_re: Add support for Variable WQE in Genp7 adapters") Reviewed-by: Kashyap Desai Reviewed-by: Kalesh AP Signed-off-by: Damodharam Ammepalli Signed-off-by: Selvin Xavier Link: https://patch.msgid.link/20241217102649.1377704-5-kalesh-anakkur.purayil@broadcom.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin (cherry picked from commit cd1547b49b2c882dd9e2d832b5afb3d8f18d02c2) --- drivers/infiniband/hw/bnxt_re/qplib_fp.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index 2f85245d12853..1355061d698d4 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -1013,7 +1013,12 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) : 0; /* Update msn tbl size */ if (qp->is_host_msn_tbl && psn_sz) { - hwq_attr.aux_depth = roundup_pow_of_two(bnxt_qplib_set_sq_size(sq, qp->wqe_mode)); + if (qp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) + hwq_attr.aux_depth = + roundup_pow_of_two(bnxt_qplib_set_sq_size(sq, qp->wqe_mode)); + else + hwq_attr.aux_depth = + roundup_pow_of_two(bnxt_qplib_set_sq_size(sq, qp->wqe_mode)) / 2; qp->msn_tbl_sz = hwq_attr.aux_depth; qp->msn = 0; } From 24de9b25641bc61a5fbfcf7b421a3350febb9d11 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Tue, 17 Dec 2024 15:56:49 +0530 Subject: [PATCH 130/216] RDMA/bnxt_re: Fix the locking while accessing the QP table [ Upstream commit 9272cba0ded71b5a2084da3004ec7806b8cb7fd2 ] QP table handling is synchronized with destroy QP and Async event from the HW. The same needs to be synchronized during create_qp also. Use the same lock in create_qp also. Fixes: 76d3ddff7153 ("RDMA/bnxt_re: synchronize the qp-handle table array") Fixes: f218d67ef004 ("RDMA/bnxt_re: Allow posting when QPs are in error") Fixes: 84cf229f4001 ("RDMA/bnxt_re: Fix the qp table indexing") Signed-off-by: Selvin Xavier Link: https://patch.msgid.link/20241217102649.1377704-6-kalesh-anakkur.purayil@broadcom.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin (cherry picked from commit f28fa7625536f5c6e282cdc0b669ad863cda928f) --- drivers/infiniband/hw/bnxt_re/qplib_fp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index 1355061d698d4..871a49315c880 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -1161,9 +1161,11 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) rq->dbinfo.db = qp->dpi->dbr; rq->dbinfo.max_slot = bnxt_qplib_set_rq_max_slot(rq->wqe_size); } + spin_lock_bh(&rcfw->tbl_lock); tbl_indx = map_qp_id_to_tbl_indx(qp->id, rcfw); rcfw->qp_tbl[tbl_indx].qp_id = qp->id; rcfw->qp_tbl[tbl_indx].qp_handle = (void *)qp; + spin_unlock_bh(&rcfw->tbl_lock); return 0; fail: From 1f6abe39ca312a1a4e8ce907d2776b9c34cc408d Mon Sep 17 00:00:00 2001 From: Stefan Ekenberg Date: Tue, 19 Nov 2024 08:40:29 +0100 Subject: [PATCH 131/216] drm/bridge: adv7511_audio: Update Audio InfoFrame properly [ Upstream commit 902806baf3c1e8383c1fe3ff0b6042b8cb5c2707 ] AUDIO_UPDATE bit (Bit 5 of MAIN register 0x4A) needs to be set to 1 while updating Audio InfoFrame information and then set to 0 when done. Otherwise partially updated Audio InfoFrames could be sent out. Two cases where this rule were not followed are fixed: - In adv7511_hdmi_hw_params() make sure AUDIO_UPDATE bit is updated before/after setting ADV7511_REG_AUDIO_INFOFRAME. - In audio_startup() use the correct register for clearing AUDIO_UPDATE bit. The problem with corrupted audio infoframes were discovered by letting a HDMI logic analyser check the output of ADV7535. Note that this patchs replaces writing REG_GC(1) with REG_INFOFRAME_UPDATE. Bit 5 of REG_GC(1) is positioned within field GC_PP[3:0] and that field doesn't control audio infoframe and is read- only. My conclusion therefore was that the author if this code meant to clear bit 5 of REG_INFOFRAME_UPDATE from the very beginning. Tested-by: Biju Das Fixes: 53c515befe28 ("drm/bridge: adv7511: Add Audio support") Signed-off-by: Stefan Ekenberg Reviewed-by: Dmitry Baryshkov Link: https://patchwork.freedesktop.org/patch/msgid/20241119-adv7511-audio-info-frame-v4-1-4ae68e76c89c@axis.com Signed-off-by: Dmitry Baryshkov Signed-off-by: Sasha Levin (cherry picked from commit fa7f96589f171ef3ab913f931c852264772fa8d6) --- drivers/gpu/drm/bridge/adv7511/adv7511_audio.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511_audio.c b/drivers/gpu/drm/bridge/adv7511/adv7511_audio.c index 61f4a38e7d2bf..8f786592143b6 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511_audio.c +++ b/drivers/gpu/drm/bridge/adv7511/adv7511_audio.c @@ -153,7 +153,16 @@ static int adv7511_hdmi_hw_params(struct device *dev, void *data, ADV7511_AUDIO_CFG3_LEN_MASK, len); regmap_update_bits(adv7511->regmap, ADV7511_REG_I2C_FREQ_ID_CFG, ADV7511_I2C_FREQ_ID_CFG_RATE_MASK, rate << 4); - regmap_write(adv7511->regmap, 0x73, 0x1); + + /* send current Audio infoframe values while updating */ + regmap_update_bits(adv7511->regmap, ADV7511_REG_INFOFRAME_UPDATE, + BIT(5), BIT(5)); + + regmap_write(adv7511->regmap, ADV7511_REG_AUDIO_INFOFRAME(0), 0x1); + + /* use Audio infoframe updated info */ + regmap_update_bits(adv7511->regmap, ADV7511_REG_INFOFRAME_UPDATE, + BIT(5), 0); return 0; } @@ -184,8 +193,9 @@ static int audio_startup(struct device *dev, void *data) regmap_update_bits(adv7511->regmap, ADV7511_REG_GC(0), BIT(7) | BIT(6), BIT(7)); /* use Audio infoframe updated info */ - regmap_update_bits(adv7511->regmap, ADV7511_REG_GC(1), + regmap_update_bits(adv7511->regmap, ADV7511_REG_INFOFRAME_UPDATE, BIT(5), 0); + /* enable SPDIF receiver */ if (adv7511->audio_source == ADV7511_AUDIO_SOURCE_SPDIF) regmap_update_bits(adv7511->regmap, ADV7511_REG_AUDIO_CONFIG, From 9de5f251fe41d938b039731c17cd51a540e4f75b Mon Sep 17 00:00:00 2001 From: Tristram Ha Date: Tue, 17 Dec 2024 18:02:23 -0800 Subject: [PATCH 132/216] net: dsa: microchip: Fix KSZ9477 set_ageing_time function [ Upstream commit 262bfba8ab820641c8cfbbf03b86d6c00242c078 ] The aging count is not a simple 11-bit value but comprises a 3-bit multiplier and an 8-bit second count. The code tries to use the original multiplier which is 4 as the second count is still 300 seconds by default. Fixes: 2c119d9982b1 ("net: dsa: microchip: add the support for set_ageing_time") Signed-off-by: Tristram Ha Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20241218020224.70590-2-Tristram.Ha@microchip.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin (cherry picked from commit 7583dd5928b6e2390139d02a955037bc58d832f6) --- drivers/net/dsa/microchip/ksz9477.c | 47 +++++++++++++++++++------ drivers/net/dsa/microchip/ksz9477_reg.h | 4 +-- 2 files changed, 37 insertions(+), 14 deletions(-) diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c index a7e8fcdf25768..59134d117846d 100644 --- a/drivers/net/dsa/microchip/ksz9477.c +++ b/drivers/net/dsa/microchip/ksz9477.c @@ -2,7 +2,7 @@ /* * Microchip KSZ9477 switch driver main logic * - * Copyright (C) 2017-2019 Microchip Technology Inc. + * Copyright (C) 2017-2024 Microchip Technology Inc. */ #include @@ -916,26 +916,51 @@ void ksz9477_get_caps(struct ksz_device *dev, int port, int ksz9477_set_ageing_time(struct ksz_device *dev, unsigned int msecs) { u32 secs = msecs / 1000; - u8 value; - u8 data; + u8 data, mult, value; + u32 max_val; int ret; - value = FIELD_GET(SW_AGE_PERIOD_7_0_M, secs); +#define MAX_TIMER_VAL ((1 << 8) - 1) - ret = ksz_write8(dev, REG_SW_LUE_CTRL_3, value); - if (ret < 0) - return ret; + /* The aging timer comprises a 3-bit multiplier and an 8-bit second + * value. Either of them cannot be zero. The maximum timer is then + * 7 * 255 = 1785 seconds. + */ + if (!secs) + secs = 1; - data = FIELD_GET(SW_AGE_PERIOD_10_8_M, secs); + /* Return error if too large. */ + else if (secs > 7 * MAX_TIMER_VAL) + return -EINVAL; ret = ksz_read8(dev, REG_SW_LUE_CTRL_0, &value); if (ret < 0) return ret; - value &= ~SW_AGE_CNT_M; - value |= FIELD_PREP(SW_AGE_CNT_M, data); + /* Check whether there is need to update the multiplier. */ + mult = FIELD_GET(SW_AGE_CNT_M, value); + max_val = MAX_TIMER_VAL; + if (mult > 0) { + /* Try to use the same multiplier already in the register as + * the hardware default uses multiplier 4 and 75 seconds for + * 300 seconds. + */ + max_val = DIV_ROUND_UP(secs, mult); + if (max_val > MAX_TIMER_VAL || max_val * mult != secs) + max_val = MAX_TIMER_VAL; + } + + data = DIV_ROUND_UP(secs, max_val); + if (mult != data) { + value &= ~SW_AGE_CNT_M; + value |= FIELD_PREP(SW_AGE_CNT_M, data); + ret = ksz_write8(dev, REG_SW_LUE_CTRL_0, value); + if (ret < 0) + return ret; + } - return ksz_write8(dev, REG_SW_LUE_CTRL_0, value); + value = DIV_ROUND_UP(secs, data); + return ksz_write8(dev, REG_SW_LUE_CTRL_3, value); } void ksz9477_port_queue_split(struct ksz_device *dev, int port) diff --git a/drivers/net/dsa/microchip/ksz9477_reg.h b/drivers/net/dsa/microchip/ksz9477_reg.h index a2ef4b18349c4..d0886ed984c57 100644 --- a/drivers/net/dsa/microchip/ksz9477_reg.h +++ b/drivers/net/dsa/microchip/ksz9477_reg.h @@ -2,7 +2,7 @@ /* * Microchip KSZ9477 register definitions * - * Copyright (C) 2017-2018 Microchip Technology Inc. + * Copyright (C) 2017-2024 Microchip Technology Inc. */ #ifndef __KSZ9477_REGS_H @@ -190,8 +190,6 @@ #define SW_VLAN_ENABLE BIT(7) #define SW_DROP_INVALID_VID BIT(6) #define SW_AGE_CNT_M GENMASK(5, 3) -#define SW_AGE_CNT_S 3 -#define SW_AGE_PERIOD_10_8_M GENMASK(10, 8) #define SW_RESV_MCAST_ENABLE BIT(2) #define SW_HASH_OPTION_M 0x03 #define SW_HASH_OPTION_CRC 1 From 6dd3c6655ca47f268074e39d45ea99be21947010 Mon Sep 17 00:00:00 2001 From: Tristram Ha Date: Tue, 17 Dec 2024 18:02:24 -0800 Subject: [PATCH 133/216] net: dsa: microchip: Fix LAN937X set_ageing_time function [ Upstream commit bb9869043438af5b94230f94fb4c39206525d758 ] The aging count is not a simple 20-bit value but comprises a 3-bit multiplier and a 20-bit second time. The code tries to use the original multiplier which is 4 as the second count is still 300 seconds by default. As the 20-bit number is now too large for practical use there is an option to interpret it as microseconds instead of seconds. Fixes: 2c119d9982b1 ("net: dsa: microchip: add the support for set_ageing_time") Signed-off-by: Tristram Ha Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20241218020224.70590-3-Tristram.Ha@microchip.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin (cherry picked from commit ca2a2cad4efbdcd7e8fe8a2a80e1aaa8849e8875) --- drivers/net/dsa/microchip/lan937x_main.c | 62 ++++++++++++++++++++++-- drivers/net/dsa/microchip/lan937x_reg.h | 9 ++-- 2 files changed, 65 insertions(+), 6 deletions(-) diff --git a/drivers/net/dsa/microchip/lan937x_main.c b/drivers/net/dsa/microchip/lan937x_main.c index b479a628b1ae5..dde37e61faa35 100644 --- a/drivers/net/dsa/microchip/lan937x_main.c +++ b/drivers/net/dsa/microchip/lan937x_main.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Microchip LAN937X switch driver main logic - * Copyright (C) 2019-2022 Microchip Technology Inc. + * Copyright (C) 2019-2024 Microchip Technology Inc. */ #include #include @@ -257,10 +257,66 @@ int lan937x_change_mtu(struct ksz_device *dev, int port, int new_mtu) int lan937x_set_ageing_time(struct ksz_device *dev, unsigned int msecs) { - u32 secs = msecs / 1000; - u32 value; + u8 data, mult, value8; + bool in_msec = false; + u32 max_val, value; + u32 secs = msecs; int ret; +#define MAX_TIMER_VAL ((1 << 20) - 1) + + /* The aging timer comprises a 3-bit multiplier and a 20-bit second + * value. Either of them cannot be zero. The maximum timer is then + * 7 * 1048575 = 7340025 seconds. As this value is too large for + * practical use it can be interpreted as microseconds, making the + * maximum timer 7340 seconds with finer control. This allows for + * maximum 122 minutes compared to 29 minutes in KSZ9477 switch. + */ + if (msecs % 1000) + in_msec = true; + else + secs /= 1000; + if (!secs) + secs = 1; + + /* Return error if too large. */ + else if (secs > 7 * MAX_TIMER_VAL) + return -EINVAL; + + /* Configure how to interpret the number value. */ + ret = ksz_rmw8(dev, REG_SW_LUE_CTRL_2, SW_AGE_CNT_IN_MICROSEC, + in_msec ? SW_AGE_CNT_IN_MICROSEC : 0); + if (ret < 0) + return ret; + + ret = ksz_read8(dev, REG_SW_LUE_CTRL_0, &value8); + if (ret < 0) + return ret; + + /* Check whether there is need to update the multiplier. */ + mult = FIELD_GET(SW_AGE_CNT_M, value8); + max_val = MAX_TIMER_VAL; + if (mult > 0) { + /* Try to use the same multiplier already in the register as + * the hardware default uses multiplier 4 and 75 seconds for + * 300 seconds. + */ + max_val = DIV_ROUND_UP(secs, mult); + if (max_val > MAX_TIMER_VAL || max_val * mult != secs) + max_val = MAX_TIMER_VAL; + } + + data = DIV_ROUND_UP(secs, max_val); + if (mult != data) { + value8 &= ~SW_AGE_CNT_M; + value8 |= FIELD_PREP(SW_AGE_CNT_M, data); + ret = ksz_write8(dev, REG_SW_LUE_CTRL_0, value8); + if (ret < 0) + return ret; + } + + secs = DIV_ROUND_UP(secs, data); + value = FIELD_GET(SW_AGE_PERIOD_7_0_M, secs); ret = ksz_write8(dev, REG_SW_AGE_PERIOD__1, value); diff --git a/drivers/net/dsa/microchip/lan937x_reg.h b/drivers/net/dsa/microchip/lan937x_reg.h index 45b606b6429f6..b3e536e7c6869 100644 --- a/drivers/net/dsa/microchip/lan937x_reg.h +++ b/drivers/net/dsa/microchip/lan937x_reg.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* Microchip LAN937X switch register definitions - * Copyright (C) 2019-2021 Microchip Technology Inc. + * Copyright (C) 2019-2024 Microchip Technology Inc. */ #ifndef __LAN937X_REG_H #define __LAN937X_REG_H @@ -48,8 +48,7 @@ #define SW_VLAN_ENABLE BIT(7) #define SW_DROP_INVALID_VID BIT(6) -#define SW_AGE_CNT_M 0x7 -#define SW_AGE_CNT_S 3 +#define SW_AGE_CNT_M GENMASK(5, 3) #define SW_RESV_MCAST_ENABLE BIT(2) #define REG_SW_LUE_CTRL_1 0x0311 @@ -62,6 +61,10 @@ #define SW_FAST_AGING BIT(1) #define SW_LINK_AUTO_AGING BIT(0) +#define REG_SW_LUE_CTRL_2 0x0312 + +#define SW_AGE_CNT_IN_MICROSEC BIT(7) + #define REG_SW_AGE_PERIOD__1 0x0313 #define SW_AGE_PERIOD_7_0_M GENMASK(7, 0) From ce941be4f42b545658aa72682cb175b943473dba Mon Sep 17 00:00:00 2001 From: Chengchang Tang Date: Sat, 13 Jan 2024 16:59:30 +0800 Subject: [PATCH 134/216] RDMA/hns: Refactor mtr find [ Upstream commit a4ca341080758d847db155b97887bff6f84016a4 ] hns_roce_mtr_find() is a collection of multiple functions, and the return value is also difficult to understand, which is not conducive to modification and maintenance. Separate the function of obtaining MTR root BA from this function. And some adjustments has been made to improve readability. Signed-off-by: Chengchang Tang Signed-off-by: Junxian Huang Link: https://lore.kernel.org/r/20240113085935.2838701-2-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky Stable-dep-of: 8673a6c2d9e4 ("RDMA/hns: Fix mapping error of zero-hop WQE buffer") Signed-off-by: Sasha Levin (cherry picked from commit 363f502cbfc035a49aea64cbba28a22d85ad25ff) --- drivers/infiniband/hw/hns/hns_roce_cq.c | 11 +-- drivers/infiniband/hw/hns/hns_roce_device.h | 7 +- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 102 ++++++++++---------- drivers/infiniband/hw/hns/hns_roce_mr.c | 86 +++++++++++------ 4 files changed, 121 insertions(+), 85 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 9b91731a62079..5e0d78f4e5454 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -133,14 +133,12 @@ static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; struct ib_device *ibdev = &hr_dev->ib_dev; u64 mtts[MTT_MIN_COUNT] = {}; - dma_addr_t dma_handle; int ret; - ret = hns_roce_mtr_find(hr_dev, &hr_cq->mtr, 0, mtts, ARRAY_SIZE(mtts), - &dma_handle); - if (!ret) { + ret = hns_roce_mtr_find(hr_dev, &hr_cq->mtr, 0, mtts, ARRAY_SIZE(mtts)); + if (ret) { ibdev_err(ibdev, "failed to find CQ mtr, ret = %d.\n", ret); - return -EINVAL; + return ret; } /* Get CQC memory HEM(Hardware Entry Memory) table */ @@ -157,7 +155,8 @@ static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) goto err_put; } - ret = hns_roce_create_cqc(hr_dev, hr_cq, mtts, dma_handle); + ret = hns_roce_create_cqc(hr_dev, hr_cq, mtts, + hns_roce_get_mtr_ba(&hr_cq->mtr)); if (ret) goto err_xa; diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 21ef00fdb6563..a835368548e52 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -1129,8 +1129,13 @@ void hns_roce_cmd_use_polling(struct hns_roce_dev *hr_dev); /* hns roce hw need current block and next block addr from mtt */ #define MTT_MIN_COUNT 2 +static inline dma_addr_t hns_roce_get_mtr_ba(struct hns_roce_mtr *mtr) +{ + return mtr->hem_cfg.root_ba; +} + int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, - u32 offset, u64 *mtt_buf, int mtt_max, u64 *base_addr); + u32 offset, u64 *mtt_buf, int mtt_max); int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, struct hns_roce_buf_attr *buf_attr, unsigned int page_shift, struct ib_udata *udata, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 2824d390ec316..a396ba85cdce2 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -3181,21 +3181,22 @@ static int set_mtpt_pbl(struct hns_roce_dev *hr_dev, u64 pages[HNS_ROCE_V2_MAX_INNER_MTPT_NUM] = { 0 }; struct ib_device *ibdev = &hr_dev->ib_dev; dma_addr_t pbl_ba; - int i, count; + int ret; + int i; - count = hns_roce_mtr_find(hr_dev, &mr->pbl_mtr, 0, pages, - min_t(int, ARRAY_SIZE(pages), mr->npages), - &pbl_ba); - if (count < 1) { - ibdev_err(ibdev, "failed to find PBL mtr, count = %d.\n", - count); - return -ENOBUFS; + ret = hns_roce_mtr_find(hr_dev, &mr->pbl_mtr, 0, pages, + min_t(int, ARRAY_SIZE(pages), mr->npages)); + if (ret) { + ibdev_err(ibdev, "failed to find PBL mtr, ret = %d.\n", ret); + return ret; } /* Aligned to the hardware address access unit */ - for (i = 0; i < count; i++) + for (i = 0; i < ARRAY_SIZE(pages); i++) pages[i] >>= 6; + pbl_ba = hns_roce_get_mtr_ba(&mr->pbl_mtr); + mpt_entry->pbl_size = cpu_to_le32(mr->npages); mpt_entry->pbl_ba_l = cpu_to_le32(pbl_ba >> 3); hr_reg_write(mpt_entry, MPT_PBL_BA_H, upper_32_bits(pbl_ba >> 3)); @@ -3294,18 +3295,12 @@ static int hns_roce_v2_rereg_write_mtpt(struct hns_roce_dev *hr_dev, static int hns_roce_v2_frmr_write_mtpt(struct hns_roce_dev *hr_dev, void *mb_buf, struct hns_roce_mr *mr) { - struct ib_device *ibdev = &hr_dev->ib_dev; + dma_addr_t pbl_ba = hns_roce_get_mtr_ba(&mr->pbl_mtr); struct hns_roce_v2_mpt_entry *mpt_entry; - dma_addr_t pbl_ba = 0; mpt_entry = mb_buf; memset(mpt_entry, 0, sizeof(*mpt_entry)); - if (hns_roce_mtr_find(hr_dev, &mr->pbl_mtr, 0, NULL, 0, &pbl_ba) < 0) { - ibdev_err(ibdev, "failed to find frmr mtr.\n"); - return -ENOBUFS; - } - hr_reg_write(mpt_entry, MPT_ST, V2_MPT_ST_FREE); hr_reg_write(mpt_entry, MPT_PD, mr->pd); @@ -4333,17 +4328,20 @@ static int config_qp_rq_buf(struct hns_roce_dev *hr_dev, { u64 mtts[MTT_MIN_COUNT] = { 0 }; u64 wqe_sge_ba; - int count; + int ret; /* Search qp buf's mtts */ - count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, hr_qp->rq.offset, mtts, - MTT_MIN_COUNT, &wqe_sge_ba); - if (hr_qp->rq.wqe_cnt && count < 1) { + ret = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, hr_qp->rq.offset, mtts, + MTT_MIN_COUNT); + if (hr_qp->rq.wqe_cnt && ret) { ibdev_err(&hr_dev->ib_dev, - "failed to find RQ WQE, QPN = 0x%lx.\n", hr_qp->qpn); - return -EINVAL; + "failed to find QP(0x%lx) RQ WQE buf, ret = %d.\n", + hr_qp->qpn, ret); + return ret; } + wqe_sge_ba = hns_roce_get_mtr_ba(&hr_qp->mtr); + context->wqe_sge_ba = cpu_to_le32(wqe_sge_ba >> 3); qpc_mask->wqe_sge_ba = 0; @@ -4407,23 +4405,23 @@ static int config_qp_sq_buf(struct hns_roce_dev *hr_dev, struct ib_device *ibdev = &hr_dev->ib_dev; u64 sge_cur_blk = 0; u64 sq_cur_blk = 0; - int count; + int ret; /* search qp buf's mtts */ - count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, 0, &sq_cur_blk, 1, NULL); - if (count < 1) { - ibdev_err(ibdev, "failed to find QP(0x%lx) SQ buf.\n", - hr_qp->qpn); - return -EINVAL; + ret = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, hr_qp->sq.offset, + &sq_cur_blk, 1); + if (ret) { + ibdev_err(ibdev, "failed to find QP(0x%lx) SQ WQE buf, ret = %d.\n", + hr_qp->qpn, ret); + return ret; } if (hr_qp->sge.sge_cnt > 0) { - count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, - hr_qp->sge.offset, - &sge_cur_blk, 1, NULL); - if (count < 1) { - ibdev_err(ibdev, "failed to find QP(0x%lx) SGE buf.\n", - hr_qp->qpn); - return -EINVAL; + ret = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, + hr_qp->sge.offset, &sge_cur_blk, 1); + if (ret) { + ibdev_err(ibdev, "failed to find QP(0x%lx) SGE buf, ret = %d.\n", + hr_qp->qpn, ret); + return ret; } } @@ -5550,18 +5548,20 @@ static int hns_roce_v2_write_srqc_index_queue(struct hns_roce_srq *srq, struct ib_device *ibdev = srq->ibsrq.device; struct hns_roce_dev *hr_dev = to_hr_dev(ibdev); u64 mtts_idx[MTT_MIN_COUNT] = {}; - dma_addr_t dma_handle_idx = 0; + dma_addr_t dma_handle_idx; int ret; /* Get physical address of idx que buf */ ret = hns_roce_mtr_find(hr_dev, &idx_que->mtr, 0, mtts_idx, - ARRAY_SIZE(mtts_idx), &dma_handle_idx); - if (ret < 1) { + ARRAY_SIZE(mtts_idx)); + if (ret) { ibdev_err(ibdev, "failed to find mtr for SRQ idx, ret = %d.\n", ret); - return -ENOBUFS; + return ret; } + dma_handle_idx = hns_roce_get_mtr_ba(&idx_que->mtr); + hr_reg_write(ctx, SRQC_IDX_HOP_NUM, to_hr_hem_hopnum(hr_dev->caps.idx_hop_num, srq->wqe_cnt)); @@ -5593,20 +5593,22 @@ static int hns_roce_v2_write_srqc(struct hns_roce_srq *srq, void *mb_buf) struct hns_roce_dev *hr_dev = to_hr_dev(ibdev); struct hns_roce_srq_context *ctx = mb_buf; u64 mtts_wqe[MTT_MIN_COUNT] = {}; - dma_addr_t dma_handle_wqe = 0; + dma_addr_t dma_handle_wqe; int ret; memset(ctx, 0, sizeof(*ctx)); /* Get the physical address of srq buf */ ret = hns_roce_mtr_find(hr_dev, &srq->buf_mtr, 0, mtts_wqe, - ARRAY_SIZE(mtts_wqe), &dma_handle_wqe); - if (ret < 1) { + ARRAY_SIZE(mtts_wqe)); + if (ret) { ibdev_err(ibdev, "failed to find mtr for SRQ WQE, ret = %d.\n", ret); - return -ENOBUFS; + return ret; } + dma_handle_wqe = hns_roce_get_mtr_ba(&srq->buf_mtr); + hr_reg_write(ctx, SRQC_SRQ_ST, 1); hr_reg_write_bool(ctx, SRQC_SRQ_TYPE, srq->ibsrq.srq_type == IB_SRQT_XRC); @@ -6327,7 +6329,7 @@ static int config_eqc(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq, u64 eqe_ba[MTT_MIN_COUNT] = { 0 }; struct hns_roce_eq_context *eqc; u64 bt_ba = 0; - int count; + int ret; eqc = mb_buf; memset(eqc, 0, sizeof(struct hns_roce_eq_context)); @@ -6335,13 +6337,15 @@ static int config_eqc(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq, init_eq_config(hr_dev, eq); /* if not multi-hop, eqe buffer only use one trunk */ - count = hns_roce_mtr_find(hr_dev, &eq->mtr, 0, eqe_ba, MTT_MIN_COUNT, - &bt_ba); - if (count < 1) { - dev_err(hr_dev->dev, "failed to find EQE mtr\n"); - return -ENOBUFS; + ret = hns_roce_mtr_find(hr_dev, &eq->mtr, 0, eqe_ba, + ARRAY_SIZE(eqe_ba)); + if (ret) { + dev_err(hr_dev->dev, "failed to find EQE mtr, ret = %d\n", ret); + return ret; } + bt_ba = hns_roce_get_mtr_ba(&eq->mtr); + hr_reg_write(eqc, EQC_EQ_ST, HNS_ROCE_V2_EQ_STATE_VALID); hr_reg_write(eqc, EQC_EQE_HOP_NUM, eq->hop_num); hr_reg_write(eqc, EQC_OVER_IGNORE, eq->over_ignore); diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 7f29a55d378f0..7b59b95f87c29 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -802,47 +802,53 @@ int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, return ret; } -int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, - u32 offset, u64 *mtt_buf, int mtt_max, u64 *base_addr) +static int hns_roce_get_direct_addr_mtt(struct hns_roce_hem_cfg *cfg, + u32 start_index, u64 *mtt_buf, + int mtt_cnt) { - struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg; - int mtt_count, left; - u32 start_index; + int mtt_count; int total = 0; - __le64 *mtts; u32 npage; u64 addr; - if (!mtt_buf || mtt_max < 1) - goto done; - - /* no mtt memory in direct mode, so just return the buffer address */ - if (cfg->is_direct) { - start_index = offset >> HNS_HW_PAGE_SHIFT; - for (mtt_count = 0; mtt_count < cfg->region_count && - total < mtt_max; mtt_count++) { - npage = cfg->region[mtt_count].offset; - if (npage < start_index) - continue; + if (mtt_cnt > cfg->region_count) + return -EINVAL; - addr = cfg->root_ba + (npage << HNS_HW_PAGE_SHIFT); - mtt_buf[total] = addr; + for (mtt_count = 0; mtt_count < cfg->region_count && total < mtt_cnt; + mtt_count++) { + npage = cfg->region[mtt_count].offset; + if (npage < start_index) + continue; - total++; - } + addr = cfg->root_ba + (npage << HNS_HW_PAGE_SHIFT); + mtt_buf[total] = addr; - goto done; + total++; } - start_index = offset >> cfg->buf_pg_shift; - left = mtt_max; + if (!total) + return -ENOENT; + + return 0; +} + +static int hns_roce_get_mhop_mtt(struct hns_roce_dev *hr_dev, + struct hns_roce_mtr *mtr, u32 start_index, + u64 *mtt_buf, int mtt_cnt) +{ + int left = mtt_cnt; + int total = 0; + int mtt_count; + __le64 *mtts; + u32 npage; + while (left > 0) { mtt_count = 0; mtts = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list, start_index + total, &mtt_count); if (!mtts || !mtt_count) - goto done; + break; npage = min(mtt_count, left); left -= npage; @@ -850,11 +856,33 @@ int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, mtt_buf[total++] = le64_to_cpu(mtts[mtt_count]); } -done: - if (base_addr) - *base_addr = cfg->root_ba; + if (!total) + return -ENOENT; + + return 0; +} + +int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, + u32 offset, u64 *mtt_buf, int mtt_max) +{ + struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg; + u32 start_index; + int ret; + + if (!mtt_buf || mtt_max < 1) + return -EINVAL; - return total; + /* no mtt memory in direct mode, so just return the buffer address */ + if (cfg->is_direct) { + start_index = offset >> HNS_HW_PAGE_SHIFT; + ret = hns_roce_get_direct_addr_mtt(cfg, start_index, + mtt_buf, mtt_max); + } else { + start_index = offset >> cfg->buf_pg_shift; + ret = hns_roce_get_mhop_mtt(hr_dev, mtr, start_index, + mtt_buf, mtt_max); + } + return ret; } static int mtr_init_buf_cfg(struct hns_roce_dev *hr_dev, From 18eb723e29f6d9b621105843c5c1c58360833d21 Mon Sep 17 00:00:00 2001 From: Chengchang Tang Date: Fri, 12 Apr 2024 17:16:08 +0800 Subject: [PATCH 135/216] RDMA/hns: Remove unused parameters and variables [ Upstream commit f4caa864af84f801a5821ea2ba6c1cc46f8252c1 ] Remove unused parameters and variables. Signed-off-by: Chengchang Tang Signed-off-by: Junxian Huang Link: https://lore.kernel.org/r/20240412091616.370789-3-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky Stable-dep-of: 8673a6c2d9e4 ("RDMA/hns: Fix mapping error of zero-hop WQE buffer") Signed-off-by: Sasha Levin (cherry picked from commit 2049fb6c8bd75a93df833a06918e087ff2bc8a91) --- drivers/infiniband/hw/hns/hns_roce_alloc.c | 3 +-- drivers/infiniband/hw/hns/hns_roce_device.h | 5 ++--- drivers/infiniband/hw/hns/hns_roce_hem.c | 13 +++++-------- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 20 +++++++------------- drivers/infiniband/hw/hns/hns_roce_mr.c | 4 ++-- drivers/infiniband/hw/hns/hns_roce_qp.c | 4 +--- drivers/infiniband/hw/hns/hns_roce_srq.c | 4 ++-- 7 files changed, 20 insertions(+), 33 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c index 11a78ceae5689..950c133d4220e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_alloc.c +++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c @@ -153,8 +153,7 @@ int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, return total; } -int hns_roce_get_umem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, - int buf_cnt, struct ib_umem *umem, +int hns_roce_get_umem_bufs(dma_addr_t *bufs, int buf_cnt, struct ib_umem *umem, unsigned int page_shift) { struct ib_block_iter biter; diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index a835368548e52..03b6546f63cdc 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -892,8 +892,7 @@ struct hns_roce_hw { int (*rereg_write_mtpt)(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, int flags, void *mb_buf); - int (*frmr_write_mtpt)(struct hns_roce_dev *hr_dev, void *mb_buf, - struct hns_roce_mr *mr); + int (*frmr_write_mtpt)(void *mb_buf, struct hns_roce_mr *mr); int (*mw_write_mtpt)(void *mb_buf, struct hns_roce_mw *mw); void (*write_cqc)(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, void *mb_buf, u64 *mtts, @@ -1193,7 +1192,7 @@ struct hns_roce_buf *hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, int buf_cnt, struct hns_roce_buf *buf, unsigned int page_shift); -int hns_roce_get_umem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, +int hns_roce_get_umem_bufs(dma_addr_t *bufs, int buf_cnt, struct ib_umem *umem, unsigned int page_shift); diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index 0ab514c49d5e6..d3fabf64e390f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -1043,15 +1043,13 @@ static void hem_list_free_all(struct hns_roce_dev *hr_dev, } } -static void hem_list_link_bt(struct hns_roce_dev *hr_dev, void *base_addr, - u64 table_addr) +static void hem_list_link_bt(void *base_addr, u64 table_addr) { *(u64 *)(base_addr) = table_addr; } /* assign L0 table address to hem from root bt */ -static void hem_list_assign_bt(struct hns_roce_dev *hr_dev, - struct hns_roce_hem_item *hem, void *cpu_addr, +static void hem_list_assign_bt(struct hns_roce_hem_item *hem, void *cpu_addr, u64 phy_addr) { hem->addr = cpu_addr; @@ -1222,8 +1220,7 @@ static int hem_list_alloc_mid_bt(struct hns_roce_dev *hr_dev, if (level > 1) { pre = hem_ptrs[level - 1]; step = (cur->start - pre->start) / step * BA_BYTE_LEN; - hem_list_link_bt(hr_dev, pre->addr + step, - cur->dma_addr); + hem_list_link_bt(pre->addr + step, cur->dma_addr); } } @@ -1281,7 +1278,7 @@ static int alloc_fake_root_bt(struct hns_roce_dev *hr_dev, void *cpu_base, if (!hem) return -ENOMEM; - hem_list_assign_bt(hr_dev, hem, cpu_base, phy_base); + hem_list_assign_bt(hem, cpu_base, phy_base); list_add(&hem->list, branch_head); list_add(&hem->sibling, leaf_head); @@ -1304,7 +1301,7 @@ static int setup_middle_bt(struct hns_roce_dev *hr_dev, void *cpu_base, /* if exist mid bt, link L1 to L0 */ list_for_each_entry_safe(hem, temp_hem, branch_head, list) { offset = (hem->start - r->offset) / step * BA_BYTE_LEN; - hem_list_link_bt(hr_dev, cpu_base + offset, hem->dma_addr); + hem_list_link_bt(cpu_base + offset, hem->dma_addr); total++; } diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index a396ba85cdce2..aed9c403f3be4 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -3292,8 +3292,7 @@ static int hns_roce_v2_rereg_write_mtpt(struct hns_roce_dev *hr_dev, return ret; } -static int hns_roce_v2_frmr_write_mtpt(struct hns_roce_dev *hr_dev, - void *mb_buf, struct hns_roce_mr *mr) +static int hns_roce_v2_frmr_write_mtpt(void *mb_buf, struct hns_roce_mr *mr) { dma_addr_t pbl_ba = hns_roce_get_mtr_ba(&mr->pbl_mtr); struct hns_roce_v2_mpt_entry *mpt_entry; @@ -4208,8 +4207,7 @@ static void set_access_flags(struct hns_roce_qp *hr_qp, } static void set_qpc_wqe_cnt(struct hns_roce_qp *hr_qp, - struct hns_roce_v2_qp_context *context, - struct hns_roce_v2_qp_context *qpc_mask) + struct hns_roce_v2_qp_context *context) { hr_reg_write(context, QPC_SGE_SHIFT, to_hr_hem_entries_shift(hr_qp->sge.sge_cnt, @@ -4231,7 +4229,6 @@ static inline int get_pdn(struct ib_pd *ib_pd) } static void modify_qp_reset_to_init(struct ib_qp *ibqp, - const struct ib_qp_attr *attr, struct hns_roce_v2_qp_context *context, struct hns_roce_v2_qp_context *qpc_mask) { @@ -4250,7 +4247,7 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp, hr_reg_write(context, QPC_RQWS, ilog2(hr_qp->rq.max_gs)); - set_qpc_wqe_cnt(hr_qp, context, qpc_mask); + set_qpc_wqe_cnt(hr_qp, context); /* No VLAN need to set 0xFFF */ hr_reg_write(context, QPC_VLAN_ID, 0xfff); @@ -4291,7 +4288,6 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp, } static void modify_qp_init_to_init(struct ib_qp *ibqp, - const struct ib_qp_attr *attr, struct hns_roce_v2_qp_context *context, struct hns_roce_v2_qp_context *qpc_mask) { @@ -4612,8 +4608,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, return 0; } -static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, - const struct ib_qp_attr *attr, int attr_mask, +static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, int attr_mask, struct hns_roce_v2_qp_context *context, struct hns_roce_v2_qp_context *qpc_mask) { @@ -4982,15 +4977,14 @@ static int hns_roce_v2_set_abs_fields(struct ib_qp *ibqp, if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { memset(qpc_mask, 0, hr_dev->caps.qpc_sz); - modify_qp_reset_to_init(ibqp, attr, context, qpc_mask); + modify_qp_reset_to_init(ibqp, context, qpc_mask); } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) { - modify_qp_init_to_init(ibqp, attr, context, qpc_mask); + modify_qp_init_to_init(ibqp, context, qpc_mask); } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) { ret = modify_qp_init_to_rtr(ibqp, attr, attr_mask, context, qpc_mask, udata); } else if (cur_state == IB_QPS_RTR && new_state == IB_QPS_RTS) { - ret = modify_qp_rtr_to_rts(ibqp, attr, attr_mask, context, - qpc_mask); + ret = modify_qp_rtr_to_rts(ibqp, attr_mask, context, qpc_mask); } return ret; diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 7b59b95f87c29..acdcd5c9f42f8 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -154,7 +154,7 @@ static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev, if (mr->type != MR_TYPE_FRMR) ret = hr_dev->hw->write_mtpt(hr_dev, mailbox->buf, mr); else - ret = hr_dev->hw->frmr_write_mtpt(hr_dev, mailbox->buf, mr); + ret = hr_dev->hw->frmr_write_mtpt(mailbox->buf, mr); if (ret) { dev_err(dev, "failed to write mtpt, ret = %d.\n", ret); goto err_page; @@ -714,7 +714,7 @@ static int mtr_map_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, return -ENOMEM; if (mtr->umem) - npage = hns_roce_get_umem_bufs(hr_dev, pages, page_count, + npage = hns_roce_get_umem_bufs(pages, page_count, mtr->umem, page_shift); else npage = hns_roce_get_kmem_bufs(hr_dev, pages, page_count, diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 88a4777d29f8b..97d79c8d5cd06 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -1075,7 +1075,6 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, } static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, - struct ib_pd *ib_pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata, struct hns_roce_qp *hr_qp) @@ -1229,7 +1228,6 @@ int hns_roce_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *init_attr, struct ib_device *ibdev = qp->device; struct hns_roce_dev *hr_dev = to_hr_dev(ibdev); struct hns_roce_qp *hr_qp = to_hr_qp(qp); - struct ib_pd *pd = qp->pd; int ret; ret = check_qp_type(hr_dev, init_attr->qp_type, !!udata); @@ -1244,7 +1242,7 @@ int hns_roce_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *init_attr, hr_qp->phy_port = hr_dev->iboe.phy_port[hr_qp->port]; } - ret = hns_roce_create_qp_common(hr_dev, pd, init_attr, udata, hr_qp); + ret = hns_roce_create_qp_common(hr_dev, init_attr, udata, hr_qp); if (ret) ibdev_err(ibdev, "create QP type 0x%x failed(%d)\n", init_attr->qp_type, ret); diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 652508b660a06..80fcb1b0e8fdc 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -249,7 +249,7 @@ static void free_srq_wqe_buf(struct hns_roce_dev *hr_dev, hns_roce_mtr_destroy(hr_dev, &srq->buf_mtr); } -static int alloc_srq_wrid(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) +static int alloc_srq_wrid(struct hns_roce_srq *srq) { srq->wrid = kvmalloc_array(srq->wqe_cnt, sizeof(u64), GFP_KERNEL); if (!srq->wrid) @@ -365,7 +365,7 @@ static int alloc_srq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, goto err_idx; if (!udata) { - ret = alloc_srq_wrid(hr_dev, srq); + ret = alloc_srq_wrid(srq); if (ret) goto err_wqe_buf; } From 5a4d75d91c4cb9fbfd506142d21f9258137e81c9 Mon Sep 17 00:00:00 2001 From: wenglianfa Date: Fri, 20 Dec 2024 13:52:46 +0800 Subject: [PATCH 136/216] RDMA/hns: Fix mapping error of zero-hop WQE buffer [ Upstream commit 8673a6c2d9e483dfeeef83a1f06f59e05636f4d1 ] Due to HW limitation, the three region of WQE buffer must be mapped and set to HW in a fixed order: SQ buffer, SGE buffer, and RQ buffer. Currently when one region is zero-hop while the other two are not, the zero-hop region will not be mapped. This violate the limitation above and leads to address error. Fixes: 38389eaa4db1 ("RDMA/hns: Add mtr support for mixed multihop addressing") Signed-off-by: wenglianfa Signed-off-by: Junxian Huang Link: https://patch.msgid.link/20241220055249.146943-2-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin (cherry picked from commit 2746888be48cb95248bce8e151080e514265c41a) --- drivers/infiniband/hw/hns/hns_roce_hem.c | 43 ++++++++++++++++-------- drivers/infiniband/hw/hns/hns_roce_mr.c | 5 --- 2 files changed, 29 insertions(+), 19 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index d3fabf64e390f..51ab6041ca91b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -986,6 +986,7 @@ struct hns_roce_hem_item { size_t count; /* max ba numbers */ int start; /* start buf offset in this hem */ int end; /* end buf offset in this hem */ + bool exist_bt; }; /* All HEM items are linked in a tree structure */ @@ -1014,6 +1015,7 @@ hem_list_alloc_item(struct hns_roce_dev *hr_dev, int start, int end, int count, } } + hem->exist_bt = exist_bt; hem->count = count; hem->start = start; hem->end = end; @@ -1024,22 +1026,22 @@ hem_list_alloc_item(struct hns_roce_dev *hr_dev, int start, int end, int count, } static void hem_list_free_item(struct hns_roce_dev *hr_dev, - struct hns_roce_hem_item *hem, bool exist_bt) + struct hns_roce_hem_item *hem) { - if (exist_bt) + if (hem->exist_bt) dma_free_coherent(hr_dev->dev, hem->count * BA_BYTE_LEN, hem->addr, hem->dma_addr); kfree(hem); } static void hem_list_free_all(struct hns_roce_dev *hr_dev, - struct list_head *head, bool exist_bt) + struct list_head *head) { struct hns_roce_hem_item *hem, *temp_hem; list_for_each_entry_safe(hem, temp_hem, head, list) { list_del(&hem->list); - hem_list_free_item(hr_dev, hem, exist_bt); + hem_list_free_item(hr_dev, hem); } } @@ -1139,6 +1141,10 @@ int hns_roce_hem_list_calc_root_ba(const struct hns_roce_buf_region *regions, for (i = 0; i < region_cnt; i++) { r = (struct hns_roce_buf_region *)®ions[i]; + /* when r->hopnum = 0, the region should not occupy root_ba. */ + if (!r->hopnum) + continue; + if (r->hopnum > 1) { step = hem_list_calc_ba_range(r->hopnum, 1, unit); if (step > 0) @@ -1232,7 +1238,7 @@ static int hem_list_alloc_mid_bt(struct hns_roce_dev *hr_dev, err_exit: for (level = 1; level < hopnum; level++) - hem_list_free_all(hr_dev, &temp_list[level], true); + hem_list_free_all(hr_dev, &temp_list[level]); return ret; } @@ -1273,16 +1279,26 @@ static int alloc_fake_root_bt(struct hns_roce_dev *hr_dev, void *cpu_base, { struct hns_roce_hem_item *hem; + /* This is on the has_mtt branch, if r->hopnum + * is 0, there is no root_ba to reuse for the + * region's fake hem, so a dma_alloc request is + * necessary here. + */ hem = hem_list_alloc_item(hr_dev, r->offset, r->offset + r->count - 1, - r->count, false); + r->count, !r->hopnum); if (!hem) return -ENOMEM; - hem_list_assign_bt(hem, cpu_base, phy_base); + /* The root_ba can be reused only when r->hopnum > 0. */ + if (r->hopnum) + hem_list_assign_bt(hem, cpu_base, phy_base); list_add(&hem->list, branch_head); list_add(&hem->sibling, leaf_head); - return r->count; + /* If r->hopnum == 0, 0 is returned, + * so that the root_bt entry is not occupied. + */ + return r->hopnum ? r->count : 0; } static int setup_middle_bt(struct hns_roce_dev *hr_dev, void *cpu_base, @@ -1326,7 +1342,7 @@ setup_root_hem(struct hns_roce_dev *hr_dev, struct hns_roce_hem_list *hem_list, return -ENOMEM; total = 0; - for (i = 0; i < region_cnt && total < max_ba_num; i++) { + for (i = 0; i < region_cnt && total <= max_ba_num; i++) { r = ®ions[i]; if (!r->count) continue; @@ -1392,9 +1408,9 @@ static int hem_list_alloc_root_bt(struct hns_roce_dev *hr_dev, region_cnt); if (ret) { for (i = 0; i < region_cnt; i++) - hem_list_free_all(hr_dev, &head.branch[i], false); + hem_list_free_all(hr_dev, &head.branch[i]); - hem_list_free_all(hr_dev, &head.root, true); + hem_list_free_all(hr_dev, &head.root); } return ret; @@ -1457,10 +1473,9 @@ void hns_roce_hem_list_release(struct hns_roce_dev *hr_dev, for (i = 0; i < HNS_ROCE_MAX_BT_REGION; i++) for (j = 0; j < HNS_ROCE_MAX_BT_LEVEL; j++) - hem_list_free_all(hr_dev, &hem_list->mid_bt[i][j], - j != 0); + hem_list_free_all(hr_dev, &hem_list->mid_bt[i][j]); - hem_list_free_all(hr_dev, &hem_list->root_bt, true); + hem_list_free_all(hr_dev, &hem_list->root_bt); INIT_LIST_HEAD(&hem_list->btm_bt); hem_list->root_ba = 0; } diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index acdcd5c9f42f8..408ef2a961492 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -767,11 +767,6 @@ int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, for (i = 0, mapped_cnt = 0; i < mtr->hem_cfg.region_count && mapped_cnt < page_cnt; i++) { r = &mtr->hem_cfg.region[i]; - /* if hopnum is 0, no need to map pages in this region */ - if (!r->hopnum) { - mapped_cnt += r->count; - continue; - } if (r->offset + r->count > page_cnt) { ret = -EINVAL; From 02c5772d8099f65030c18ed5ab344dc06b5f4e53 Mon Sep 17 00:00:00 2001 From: Chengchang Tang Date: Fri, 20 Dec 2024 13:52:48 +0800 Subject: [PATCH 137/216] RDMA/hns: Fix warning storm caused by invalid input in IO path [ Upstream commit fa5c4ba8cdbfd2c2d6422e001311c8213283ebbf ] WARN_ON() is called in the IO path. And it could lead to a warning storm. Use WARN_ON_ONCE() instead of WARN_ON(). Fixes: 12542f1de179 ("RDMA/hns: Refactor process about opcode in post_send()") Signed-off-by: Chengchang Tang Signed-off-by: Junxian Huang Link: https://patch.msgid.link/20241220055249.146943-4-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin (cherry picked from commit be4293e108e20bc4af3ef27dcb1aac65c764060d) --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index aed9c403f3be4..c9c9be1224717 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -471,7 +471,7 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp, valid_num_sge = calc_wr_sge_num(wr, &msg_len); ret = set_ud_opcode(ud_sq_wqe, wr); - if (WARN_ON(ret)) + if (WARN_ON_ONCE(ret)) return ret; ud_sq_wqe->msg_len = cpu_to_le32(msg_len); @@ -575,7 +575,7 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp, rc_sq_wqe->msg_len = cpu_to_le32(msg_len); ret = set_rc_opcode(hr_dev, rc_sq_wqe, wr); - if (WARN_ON(ret)) + if (WARN_ON_ONCE(ret)) return ret; hr_reg_write(rc_sq_wqe, RC_SEND_WQE_SO, From 2565b37bd9b148cb236e56f1824281214727a24b Mon Sep 17 00:00:00 2001 From: Chengchang Tang Date: Fri, 20 Dec 2024 13:52:49 +0800 Subject: [PATCH 138/216] RDMA/hns: Fix missing flush CQE for DWQE [ Upstream commit e3debdd48423d3d75b9d366399228d7225d902cd ] Flush CQE handler has not been called if QP state gets into errored mode in DWQE path. So, the new added outstanding WQEs will never be flushed. It leads to a hung task timeout when using NFS over RDMA: __switch_to+0x7c/0xd0 __schedule+0x350/0x750 schedule+0x50/0xf0 schedule_timeout+0x2c8/0x340 wait_for_common+0xf4/0x2b0 wait_for_completion+0x20/0x40 __ib_drain_sq+0x140/0x1d0 [ib_core] ib_drain_sq+0x98/0xb0 [ib_core] rpcrdma_xprt_disconnect+0x68/0x270 [rpcrdma] xprt_rdma_close+0x20/0x60 [rpcrdma] xprt_autoclose+0x64/0x1cc [sunrpc] process_one_work+0x1d8/0x4e0 worker_thread+0x154/0x420 kthread+0x108/0x150 ret_from_fork+0x10/0x18 Fixes: 01584a5edcc4 ("RDMA/hns: Add support of direct wqe") Signed-off-by: Chengchang Tang Signed-off-by: Junxian Huang Link: https://patch.msgid.link/20241220055249.146943-5-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin (cherry picked from commit 48f63e4e64a5122aa2b9b648631be1589b85e248) --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index c9c9be1224717..aded0a7f42838 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -673,6 +673,10 @@ static void write_dwqe(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp, #define HNS_ROCE_SL_SHIFT 2 struct hns_roce_v2_rc_send_wqe *rc_sq_wqe = wqe; + if (unlikely(qp->state == IB_QPS_ERR)) { + flush_cqe(hr_dev, qp); + return; + } /* All kinds of DirectWQE have the same header field layout */ hr_reg_enable(rc_sq_wqe, RC_SEND_WQE_FLAG); hr_reg_write(rc_sq_wqe, RC_SEND_WQE_DB_SL_L, qp->sl); From 52814a1e2ae073856d7e1c751698bd9701c6592f Mon Sep 17 00:00:00 2001 From: Andrew Halaney Date: Tue, 12 Dec 2023 16:07:36 -0600 Subject: [PATCH 139/216] net: stmmac: don't create a MDIO bus if unnecessary [ Upstream commit f3c2caacee824ce4a331cdafb0b8dc8e987f105e ] Currently a MDIO bus is created if the devicetree description is either: 1. Not fixed-link 2. fixed-link but contains a MDIO bus as well The "1" case above isn't always accurate. If there's a phy-handle, it could be referencing a phy on another MDIO controller's bus[1]. In this case, where the MDIO bus is not described at all, currently stmmac will make a MDIO bus and scan its address space to discover phys (of which there are none). This process takes time scanning a bus that is known to be empty, delaying time to complete probe. There are also a lot of upstream devicetrees[2] that expect a MDIO bus to be created, scanned for phys, and the first one found connected to the MAC. This case can be inferred from the platform description by not having a phy-handle && not being fixed-link. This hits case "1" in the current driver's logic, and must be handled in any logic change here since it is a valid legacy dt-binding. Let's improve the logic to create a MDIO bus if either: - Devicetree contains a MDIO bus - !fixed-link && !phy-handle (legacy handling) This way the case where no MDIO bus should be made is handled, as well as retaining backwards compatibility with the valid cases. Below devicetree snippets can be found that explain some of the cases above more concretely. Here's[0] a devicetree example where the MAC is both fixed-link and driving a switch on MDIO (case "2" above). This needs a MDIO bus to be created: &fec1 { phy-mode = "rmii"; fixed-link { speed = <100>; full-duplex; }; mdio1: mdio { switch0: switch0@0 { compatible = "marvell,mv88e6190"; pinctrl-0 = <&pinctrl_gpio_switch0>; }; }; }; Here's[1] an example where there is no MDIO bus or fixed-link for the ethernet1 MAC, so no MDIO bus should be created since ethernet0 is the MDIO master for ethernet1's phy: ðernet0 { phy-mode = "sgmii"; phy-handle = <&sgmii_phy0>; mdio { compatible = "snps,dwmac-mdio"; sgmii_phy0: phy@8 { compatible = "ethernet-phy-id0141.0dd4"; reg = <0x8>; device_type = "ethernet-phy"; }; sgmii_phy1: phy@a { compatible = "ethernet-phy-id0141.0dd4"; reg = <0xa>; device_type = "ethernet-phy"; }; }; }; ðernet1 { phy-mode = "sgmii"; phy-handle = <&sgmii_phy1>; }; Finally there's descriptions like this[2] which don't describe the MDIO bus but expect it to be created and the whole address space scanned for a phy since there's no phy-handle or fixed-link described: &gmac { phy-supply = <&vcc_lan>; phy-mode = "rmii"; snps,reset-gpio = <&gpio3 RK_PB4 GPIO_ACTIVE_HIGH>; snps,reset-active-low; snps,reset-delays-us = <0 10000 1000000>; }; [0] https://elixir.bootlin.com/linux/v6.5-rc5/source/arch/arm/boot/dts/nxp/vf/vf610-zii-ssmb-dtu.dts [1] https://elixir.bootlin.com/linux/v6.6-rc5/source/arch/arm64/boot/dts/qcom/sa8775p-ride.dts [2] https://elixir.bootlin.com/linux/v6.6-rc5/source/arch/arm64/boot/dts/rockchip/rk3368-r88.dts#L164 Reviewed-by: Serge Semin Co-developed-by: Bartosz Golaszewski Signed-off-by: Bartosz Golaszewski Signed-off-by: Andrew Halaney Signed-off-by: David S. Miller Stable-dep-of: 2b6ffcd7873b ("net: stmmac: restructure the error path of stmmac_probe_config_dt()") Signed-off-by: Sasha Levin (cherry picked from commit 6d01d9f66ae147dbbac4fa5043d9283bdf36b956) --- .../ethernet/stmicro/stmmac/stmmac_platform.c | 91 +++++++++++-------- 1 file changed, 54 insertions(+), 37 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index b4fdd40be63cb..d73b2c17cc6c8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -296,62 +296,80 @@ static int stmmac_mtl_setup(struct platform_device *pdev, } /** - * stmmac_dt_phy - parse device-tree driver parameters to allocate PHY resources - * @plat: driver data platform structure - * @np: device tree node - * @dev: device pointer - * Description: - * The mdio bus will be allocated in case of a phy transceiver is on board; - * it will be NULL if the fixed-link is configured. - * If there is the "snps,dwmac-mdio" sub-node the mdio will be allocated - * in any case (for DSA, mdio must be registered even if fixed-link). - * The table below sums the supported configurations: - * ------------------------------- - * snps,phy-addr | Y - * ------------------------------- - * phy-handle | Y - * ------------------------------- - * fixed-link | N - * ------------------------------- - * snps,dwmac-mdio | - * even if | Y - * fixed-link | - * ------------------------------- + * stmmac_of_get_mdio() - Gets the MDIO bus from the devicetree. + * @np: devicetree node * - * It returns 0 in case of success otherwise -ENODEV. + * The MDIO bus will be searched for in the following ways: + * 1. The compatible is "snps,dwc-qos-ethernet-4.10" && a "mdio" named + * child node exists + * 2. A child node with the "snps,dwmac-mdio" compatible is present + * + * Return: The MDIO node if present otherwise NULL */ -static int stmmac_dt_phy(struct plat_stmmacenet_data *plat, - struct device_node *np, struct device *dev) +static struct device_node *stmmac_of_get_mdio(struct device_node *np) { - bool mdio = !of_phy_is_fixed_link(np); static const struct of_device_id need_mdio_ids[] = { { .compatible = "snps,dwc-qos-ethernet-4.10" }, {}, }; + struct device_node *mdio_node = NULL; if (of_match_node(need_mdio_ids, np)) { - plat->mdio_node = of_get_child_by_name(np, "mdio"); + mdio_node = of_get_child_by_name(np, "mdio"); } else { /** * If snps,dwmac-mdio is passed from DT, always register * the MDIO */ - for_each_child_of_node(np, plat->mdio_node) { - if (of_device_is_compatible(plat->mdio_node, + for_each_child_of_node(np, mdio_node) { + if (of_device_is_compatible(mdio_node, "snps,dwmac-mdio")) break; } } - if (plat->mdio_node) { + return mdio_node; +} + +/** + * stmmac_mdio_setup() - Populate platform related MDIO structures. + * @plat: driver data platform structure + * @np: devicetree node + * @dev: device pointer + * + * This searches for MDIO information from the devicetree. + * If an MDIO node is found, it's assigned to plat->mdio_node and + * plat->mdio_bus_data is allocated. + * If no connection can be determined, just plat->mdio_bus_data is allocated + * to indicate a bus should be created and scanned for a phy. + * If it's determined there's no MDIO bus needed, both are left NULL. + * + * This expects that plat->phy_node has already been searched for. + * + * Return: 0 on success, errno otherwise. + */ +static int stmmac_mdio_setup(struct plat_stmmacenet_data *plat, + struct device_node *np, struct device *dev) +{ + bool legacy_mdio; + + plat->mdio_node = stmmac_of_get_mdio(np); + if (plat->mdio_node) dev_dbg(dev, "Found MDIO subnode\n"); - mdio = true; - } - if (mdio) { - plat->mdio_bus_data = - devm_kzalloc(dev, sizeof(struct stmmac_mdio_bus_data), - GFP_KERNEL); + /* Legacy devicetrees allowed for no MDIO bus description and expect + * the bus to be scanned for devices. If there's no phy or fixed-link + * described assume this is the case since there must be something + * connected to the MAC. + */ + legacy_mdio = !of_phy_is_fixed_link(np) && !plat->phy_node; + if (legacy_mdio) + dev_info(dev, "Deprecated MDIO bus assumption used\n"); + + if (plat->mdio_node || legacy_mdio) { + plat->mdio_bus_data = devm_kzalloc(dev, + sizeof(*plat->mdio_bus_data), + GFP_KERNEL); if (!plat->mdio_bus_data) return -ENOMEM; @@ -455,8 +473,7 @@ stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac) if (of_property_read_u32(np, "snps,phy-addr", &plat->phy_addr) == 0) dev_warn(&pdev->dev, "snps,phy-addr property is deprecated\n"); - /* To Configure PHY by using all device-tree supported properties */ - rc = stmmac_dt_phy(plat, np, &pdev->dev); + rc = stmmac_mdio_setup(plat, np, &pdev->dev); if (rc) return ERR_PTR(rc); From 32d3f0d669215c2ae3d244cf9511804c053a8fb3 Mon Sep 17 00:00:00 2001 From: Joe Hattori Date: Thu, 19 Dec 2024 11:41:19 +0900 Subject: [PATCH 140/216] net: stmmac: restructure the error path of stmmac_probe_config_dt() [ Upstream commit 2b6ffcd7873b7e8a62c3e15a6f305bfc747c466b ] Current implementation of stmmac_probe_config_dt() does not release the OF node reference obtained by of_parse_phandle() in some error paths. The problem is that some error paths call stmmac_remove_config_dt() to clean up but others use and unwind ladder. These two types of error handling have not kept in sync and have been a recurring source of bugs. Re-write the error handling in stmmac_probe_config_dt() to use an unwind ladder. Consequently, stmmac_remove_config_dt() is not needed anymore, thus remove it. This bug was found by an experimental verification tool that I am developing. Fixes: 4838a5405028 ("net: stmmac: Fix wrapper drivers not detecting PHY") Signed-off-by: Joe Hattori Link: https://patch.msgid.link/20241219024119.2017012-1-joe@pf.is.s.u-tokyo.ac.jp Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin (cherry picked from commit c6870f86bde6803693c340e905ea26c544786d1a) --- .../ethernet/stmicro/stmmac/stmmac_platform.c | 27 ++++++++++++------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index d73b2c17cc6c8..4d570efd9d4bb 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -474,8 +474,10 @@ stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac) dev_warn(&pdev->dev, "snps,phy-addr property is deprecated\n"); rc = stmmac_mdio_setup(plat, np, &pdev->dev); - if (rc) - return ERR_PTR(rc); + if (rc) { + ret = ERR_PTR(rc); + goto error_put_phy; + } of_property_read_u32(np, "tx-fifo-depth", &plat->tx_fifo_size); @@ -564,8 +566,8 @@ stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac) dma_cfg = devm_kzalloc(&pdev->dev, sizeof(*dma_cfg), GFP_KERNEL); if (!dma_cfg) { - stmmac_remove_config_dt(pdev, plat); - return ERR_PTR(-ENOMEM); + ret = ERR_PTR(-ENOMEM); + goto error_put_mdio; } plat->dma_cfg = dma_cfg; @@ -593,8 +595,8 @@ stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac) rc = stmmac_mtl_setup(pdev, plat); if (rc) { - stmmac_remove_config_dt(pdev, plat); - return ERR_PTR(rc); + ret = ERR_PTR(rc); + goto error_put_mdio; } /* clock setup */ @@ -646,6 +648,10 @@ stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac) clk_disable_unprepare(plat->pclk); error_pclk_get: clk_disable_unprepare(plat->stmmac_clk); +error_put_mdio: + of_node_put(plat->mdio_node); +error_put_phy: + of_node_put(plat->phy_node); return ret; } @@ -654,16 +660,17 @@ static void devm_stmmac_remove_config_dt(void *data) { struct plat_stmmacenet_data *plat = data; - /* Platform data argument is unused */ - stmmac_remove_config_dt(NULL, plat); + clk_disable_unprepare(plat->stmmac_clk); + clk_disable_unprepare(plat->pclk); + of_node_put(plat->mdio_node); + of_node_put(plat->phy_node); } /** * devm_stmmac_probe_config_dt * @pdev: platform_device structure * @mac: MAC address to use - * Description: Devres variant of stmmac_probe_config_dt(). Does not require - * the user to call stmmac_remove_config_dt() at driver detach. + * Description: Devres variant of stmmac_probe_config_dt(). */ struct plat_stmmacenet_data * devm_stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac) From 3afc0c0f74208f3f82e53f0013a5f498fc94e905 Mon Sep 17 00:00:00 2001 From: Wang Liang Date: Thu, 19 Dec 2024 15:28:59 +0800 Subject: [PATCH 141/216] net: fix memory leak in tcp_conn_request() [ Upstream commit 4f4aa4aa28142d53f8b06585c478476cfe325cfc ] If inet_csk_reqsk_queue_hash_add() return false, tcp_conn_request() will return without free the dst memory, which allocated in af_ops->route_req. Here is the kmemleak stack: unreferenced object 0xffff8881198631c0 (size 240): comm "softirq", pid 0, jiffies 4299266571 (age 1802.392s) hex dump (first 32 bytes): 00 10 9b 03 81 88 ff ff 80 98 da bc ff ff ff ff ................ 81 55 18 bb ff ff ff ff 00 00 00 00 00 00 00 00 .U.............. backtrace: [] kmem_cache_alloc+0x60c/0xa80 [] dst_alloc+0x55/0x250 [] rt_dst_alloc+0x46/0x1d0 [] __mkroute_output+0x29a/0xa50 [] ip_route_output_key_hash+0x10b/0x240 [] ip_route_output_flow+0x1d/0x90 [] inet_csk_route_req+0x2c5/0x500 [] tcp_conn_request+0x691/0x12c0 [] tcp_rcv_state_process+0x3c8/0x11b0 [] tcp_v4_do_rcv+0x156/0x3b0 [] tcp_v4_rcv+0x1cf8/0x1d80 [] ip_protocol_deliver_rcu+0xf6/0x360 [] ip_local_deliver_finish+0xe6/0x1e0 [] ip_local_deliver+0xee/0x360 [] ip_rcv+0xad/0x2f0 [] __netif_receive_skb_one_core+0x123/0x140 Call dst_release() to free the dst memory when inet_csk_reqsk_queue_hash_add() return false in tcp_conn_request(). Fixes: ff46e3b44219 ("Fix race for duplicate reqsk on identical SYN") Signed-off-by: Wang Liang Link: https://patch.msgid.link/20241219072859.3783576-1-wangliang74@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin (cherry picked from commit 2af69905180b3fea12f9c1db374b153a06977021) --- net/ipv4/tcp_input.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index fb053942dba2a..f6a213bae5ccc 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -7192,6 +7192,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, if (unlikely(!inet_csk_reqsk_queue_hash_add(sk, req, req->timeout))) { reqsk_free(req); + dst_release(dst); return 0; } From 6e91263a82d6f007b7de7b65e00645b36f7737b8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 13 Feb 2024 06:32:34 +0000 Subject: [PATCH 142/216] ip_tunnel: annotate data-races around t->parms.link [ Upstream commit f694eee9e1c00d6ca06c5e59c04e3b6ff7d64aa9 ] t->parms.link is read locklessly, annotate these reads and opposite writes accordingly. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Stable-dep-of: b5a7b661a073 ("net: Fix netns for ip_tunnel_init_flow()") Signed-off-by: Sasha Levin (cherry picked from commit 77b1e00fe97e12243616fbb905e36ec349d3b09e) --- net/ipv4/ip_tunnel.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 72b2d68ef4da5..0f5cfe3caa2ed 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -102,10 +102,9 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, if (!ip_tunnel_key_match(&t->parms, flags, key)) continue; - if (t->parms.link == link) + if (READ_ONCE(t->parms.link) == link) return t; - else - cand = t; + cand = t; } hlist_for_each_entry_rcu(t, head, hash_node) { @@ -117,9 +116,9 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, if (!ip_tunnel_key_match(&t->parms, flags, key)) continue; - if (t->parms.link == link) + if (READ_ONCE(t->parms.link) == link) return t; - else if (!cand) + if (!cand) cand = t; } @@ -137,9 +136,9 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, if (!ip_tunnel_key_match(&t->parms, flags, key)) continue; - if (t->parms.link == link) + if (READ_ONCE(t->parms.link) == link) return t; - else if (!cand) + if (!cand) cand = t; } @@ -150,9 +149,9 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, !(t->dev->flags & IFF_UP)) continue; - if (t->parms.link == link) + if (READ_ONCE(t->parms.link) == link) return t; - else if (!cand) + if (!cand) cand = t; } @@ -221,7 +220,7 @@ static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn, hlist_for_each_entry_rcu(t, head, hash_node) { if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr && - link == t->parms.link && + link == READ_ONCE(t->parms.link) && type == t->dev->type && ip_tunnel_key_match(&t->parms, flags, key)) break; @@ -774,7 +773,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr, tunnel->parms.o_key, RT_TOS(tos), - dev_net(dev), tunnel->parms.link, + dev_net(dev), READ_ONCE(tunnel->parms.link), tunnel->fwmark, skb_get_hash(skb), 0); if (ip_tunnel_encap(skb, &tunnel->encap, &protocol, &fl4) < 0) @@ -894,7 +893,7 @@ static void ip_tunnel_update(struct ip_tunnel_net *itn, if (t->parms.link != p->link || t->fwmark != fwmark) { int mtu; - t->parms.link = p->link; + WRITE_ONCE(t->parms.link, p->link); t->fwmark = fwmark; mtu = ip_tunnel_bind_dev(dev); if (set_mtu) @@ -1084,9 +1083,9 @@ EXPORT_SYMBOL(ip_tunnel_get_link_net); int ip_tunnel_get_iflink(const struct net_device *dev) { - struct ip_tunnel *tunnel = netdev_priv(dev); + const struct ip_tunnel *tunnel = netdev_priv(dev); - return tunnel->parms.link; + return READ_ONCE(tunnel->parms.link); } EXPORT_SYMBOL(ip_tunnel_get_iflink); From 97414d4b99918855d16c420d3873082bcfd64abd Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 5 Sep 2024 19:51:33 +0300 Subject: [PATCH 143/216] ipv4: ip_tunnel: Unmask upper DSCP bits in ip_tunnel_bind_dev() [ Upstream commit e7191e517a03d025405c7df730b400ad4118474e ] Unmask the upper DSCP bits when initializing an IPv4 flow key via ip_tunnel_init_flow() before passing it to ip_route_output_key() so that in the future we could perform the FIB lookup according to the full DSCP value. Signed-off-by: Ido Schimmel Reviewed-by: Guillaume Nault Signed-off-by: David S. Miller Stable-dep-of: b5a7b661a073 ("net: Fix netns for ip_tunnel_init_flow()") Signed-off-by: Sasha Levin (cherry picked from commit ae0710c5cc74dc0eeddf9feb2bf24c05492f358f) --- net/ipv4/ip_tunnel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 0f5cfe3caa2ed..571cf7c2fa289 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -293,7 +293,7 @@ static int ip_tunnel_bind_dev(struct net_device *dev) ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr, iph->saddr, tunnel->parms.o_key, - RT_TOS(iph->tos), dev_net(dev), + iph->tos & INET_DSCP_MASK, dev_net(dev), tunnel->parms.link, tunnel->fwmark, 0, 0); rt = ip_route_output_key(tunnel->net, &fl4); From ebd1563ea48cab69f72ac55cee0327fc0db4a98a Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 5 Sep 2024 19:51:34 +0300 Subject: [PATCH 144/216] ipv4: ip_tunnel: Unmask upper DSCP bits in ip_md_tunnel_xmit() [ Upstream commit c34cfe72bb260fc49660d9e6a9ba95ba01669ae2 ] Unmask the upper DSCP bits when initializing an IPv4 flow key via ip_tunnel_init_flow() before passing it to ip_route_output_key() so that in the future we could perform the FIB lookup according to the full DSCP value. Note that the 'tos' variable includes the full DS field. Either the one specified via the tunnel key or the one inherited from the inner packet. Signed-off-by: Ido Schimmel Reviewed-by: Guillaume Nault Signed-off-by: David S. Miller Stable-dep-of: b5a7b661a073 ("net: Fix netns for ip_tunnel_init_flow()") Signed-off-by: Sasha Levin (cherry picked from commit 3b1a7fb74ab1804e0dde0a02c16df949e8809a8b) --- net/ipv4/ip_tunnel.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 571cf7c2fa289..b5437755365cc 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -43,6 +43,7 @@ #include #include #include +#include #if IS_ENABLED(CONFIG_IPV6) #include @@ -609,9 +610,9 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph); } ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, - tunnel_id_to_key32(key->tun_id), RT_TOS(tos), - dev_net(dev), 0, skb->mark, skb_get_hash(skb), - key->flow_flags); + tunnel_id_to_key32(key->tun_id), + tos & INET_DSCP_MASK, dev_net(dev), 0, skb->mark, + skb_get_hash(skb), key->flow_flags); if (!tunnel_hlen) tunnel_hlen = ip_encap_hlen(&tun_info->encap); From ee5fc76bf1d9628920330b3db8ab97064d9831c2 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 5 Sep 2024 19:51:35 +0300 Subject: [PATCH 145/216] ipv4: ip_tunnel: Unmask upper DSCP bits in ip_tunnel_xmit() [ Upstream commit c2b639f9f3b7a058ca9c7349b096f355773f2cd8 ] Unmask the upper DSCP bits when initializing an IPv4 flow key via ip_tunnel_init_flow() before passing it to ip_route_output_key() so that in the future we could perform the FIB lookup according to the full DSCP value. Note that the 'tos' variable includes the full DS field. Either the one specified as part of the tunnel parameters or the one inherited from the inner packet. Signed-off-by: Ido Schimmel Reviewed-by: Guillaume Nault Signed-off-by: David S. Miller Stable-dep-of: b5a7b661a073 ("net: Fix netns for ip_tunnel_init_flow()") Signed-off-by: Sasha Levin (cherry picked from commit 7e9aa1a065dc1409c4081384bc5decbbe8c22291) --- net/ipv4/ip_tunnel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index b5437755365cc..fd8923561b184 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -773,7 +773,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, } ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr, - tunnel->parms.o_key, RT_TOS(tos), + tunnel->parms.o_key, tos & INET_DSCP_MASK, dev_net(dev), READ_ONCE(tunnel->parms.link), tunnel->fwmark, skb_get_hash(skb), 0); From 2eca73ad0da7bc5ca31762f88d8f7d90f59b18b0 Mon Sep 17 00:00:00 2001 From: Xiao Liang Date: Thu, 19 Dec 2024 21:03:36 +0800 Subject: [PATCH 146/216] net: Fix netns for ip_tunnel_init_flow() [ Upstream commit b5a7b661a073727219fedc35f5619f62418ffe72 ] The device denoted by tunnel->parms.link resides in the underlay net namespace. Therefore pass tunnel->net to ip_tunnel_init_flow(). Fixes: db53cd3d88dc ("net: Handle l3mdev in ip_tunnel_init_flow") Signed-off-by: Xiao Liang Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/20241219130336.103839-1-shaw.leon@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin (cherry picked from commit 36eff8669b74d0edc9acec1da6724a5cfe94d781) --- drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c | 3 +-- net/ipv4/ip_tunnel.c | 6 +++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c index dcd198104141f..fa3fef2b74db0 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c @@ -423,8 +423,7 @@ mlxsw_sp_span_gretap4_route(const struct net_device *to_dev, parms = mlxsw_sp_ipip_netdev_parms4(to_dev); ip_tunnel_init_flow(&fl4, parms.iph.protocol, *daddrp, *saddrp, - 0, 0, dev_net(to_dev), parms.link, tun->fwmark, 0, - 0); + 0, 0, tun->net, parms.link, tun->fwmark, 0, 0); rt = ip_route_output_key(tun->net, &fl4); if (IS_ERR(rt)) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index fd8923561b184..dd1803bf9c5c6 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -294,7 +294,7 @@ static int ip_tunnel_bind_dev(struct net_device *dev) ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr, iph->saddr, tunnel->parms.o_key, - iph->tos & INET_DSCP_MASK, dev_net(dev), + iph->tos & INET_DSCP_MASK, tunnel->net, tunnel->parms.link, tunnel->fwmark, 0, 0); rt = ip_route_output_key(tunnel->net, &fl4); @@ -611,7 +611,7 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, } ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, tunnel_id_to_key32(key->tun_id), - tos & INET_DSCP_MASK, dev_net(dev), 0, skb->mark, + tos & INET_DSCP_MASK, tunnel->net, 0, skb->mark, skb_get_hash(skb), key->flow_flags); if (!tunnel_hlen) @@ -774,7 +774,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr, tunnel->parms.o_key, tos & INET_DSCP_MASK, - dev_net(dev), READ_ONCE(tunnel->parms.link), + tunnel->net, READ_ONCE(tunnel->parms.link), tunnel->fwmark, skb_get_hash(skb), 0); if (ip_tunnel_encap(skb, &tunnel->encap, &protocol, &fl4) < 0) From b6aa53b68b0e35ba8eb95d3ce5a56bd072e18707 Mon Sep 17 00:00:00 2001 From: Ilya Shchipletsov Date: Thu, 19 Dec 2024 08:23:07 +0000 Subject: [PATCH 147/216] netrom: check buffer length before accessing it [ Upstream commit a4fd163aed2edd967a244499754dec991d8b4c7d ] Syzkaller reports an uninit value read from ax25cmp when sending raw message through ieee802154 implementation. ===================================================== BUG: KMSAN: uninit-value in ax25cmp+0x3a5/0x460 net/ax25/ax25_addr.c:119 ax25cmp+0x3a5/0x460 net/ax25/ax25_addr.c:119 nr_dev_get+0x20e/0x450 net/netrom/nr_route.c:601 nr_route_frame+0x1a2/0xfc0 net/netrom/nr_route.c:774 nr_xmit+0x5a/0x1c0 net/netrom/nr_dev.c:144 __netdev_start_xmit include/linux/netdevice.h:4940 [inline] netdev_start_xmit include/linux/netdevice.h:4954 [inline] xmit_one net/core/dev.c:3548 [inline] dev_hard_start_xmit+0x247/0xa10 net/core/dev.c:3564 __dev_queue_xmit+0x33b8/0x5130 net/core/dev.c:4349 dev_queue_xmit include/linux/netdevice.h:3134 [inline] raw_sendmsg+0x654/0xc10 net/ieee802154/socket.c:299 ieee802154_sock_sendmsg+0x91/0xc0 net/ieee802154/socket.c:96 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg net/socket.c:745 [inline] ____sys_sendmsg+0x9c2/0xd60 net/socket.c:2584 ___sys_sendmsg+0x28d/0x3c0 net/socket.c:2638 __sys_sendmsg net/socket.c:2667 [inline] __do_sys_sendmsg net/socket.c:2676 [inline] __se_sys_sendmsg net/socket.c:2674 [inline] __x64_sys_sendmsg+0x307/0x490 net/socket.c:2674 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0x44/0x110 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b Uninit was created at: slab_post_alloc_hook+0x129/0xa70 mm/slab.h:768 slab_alloc_node mm/slub.c:3478 [inline] kmem_cache_alloc_node+0x5e9/0xb10 mm/slub.c:3523 kmalloc_reserve+0x13d/0x4a0 net/core/skbuff.c:560 __alloc_skb+0x318/0x740 net/core/skbuff.c:651 alloc_skb include/linux/skbuff.h:1286 [inline] alloc_skb_with_frags+0xc8/0xbd0 net/core/skbuff.c:6334 sock_alloc_send_pskb+0xa80/0xbf0 net/core/sock.c:2780 sock_alloc_send_skb include/net/sock.h:1884 [inline] raw_sendmsg+0x36d/0xc10 net/ieee802154/socket.c:282 ieee802154_sock_sendmsg+0x91/0xc0 net/ieee802154/socket.c:96 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg net/socket.c:745 [inline] ____sys_sendmsg+0x9c2/0xd60 net/socket.c:2584 ___sys_sendmsg+0x28d/0x3c0 net/socket.c:2638 __sys_sendmsg net/socket.c:2667 [inline] __do_sys_sendmsg net/socket.c:2676 [inline] __se_sys_sendmsg net/socket.c:2674 [inline] __x64_sys_sendmsg+0x307/0x490 net/socket.c:2674 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0x44/0x110 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b CPU: 0 PID: 5037 Comm: syz-executor166 Not tainted 6.7.0-rc7-syzkaller-00003-gfbafc3e621c3 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 11/17/2023 ===================================================== This issue occurs because the skb buffer is too small, and it's actual allocation is aligned. This hides an actual issue, which is that nr_route_frame does not validate the buffer size before using it. Fix this issue by checking skb->len before accessing any fields in skb->data. Found by Linux Verification Center (linuxtesting.org) with Syzkaller. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Co-developed-by: Nikita Marushkin Signed-off-by: Nikita Marushkin Signed-off-by: Ilya Shchipletsov Link: https://patch.msgid.link/20241219082308.3942-1-rabbelkin@mail.ru Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin (cherry picked from commit f647d72245aadce30618f4c8fd3803904418dbec) --- net/netrom/nr_route.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index bd2b17b219ae9..0b270893ee14c 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -754,6 +754,12 @@ int nr_route_frame(struct sk_buff *skb, ax25_cb *ax25) int ret; struct sk_buff *skbn; + /* + * Reject malformed packets early. Check that it contains at least 2 + * addresses and 1 byte more for Time-To-Live + */ + if (skb->len < 2 * sizeof(ax25_address) + 1) + return 0; nr_src = (ax25_address *)(skb->data + 0); nr_dest = (ax25_address *)(skb->data + 7); From bcb3065948844a531b73aa875689f6ef5ed4a1d0 Mon Sep 17 00:00:00 2001 From: Shahar Shitrit Date: Fri, 20 Dec 2024 10:15:02 +0200 Subject: [PATCH 148/216] net/mlx5: DR, select MSIX vector 0 for completion queue creation [ Upstream commit 050a4c011b0dfeb91664a5d7bd3647ff38db08ce ] When creating a software steering completion queue (CQ), an arbitrary MSIX vector n is selected. This results in the CQ sharing the same Ethernet traffic channel n associated with the chosen vector. However, the value of n is often unpredictable, which can introduce complications for interrupt monitoring and verification tools. Moreover, SW steering uses polling rather than event-driven interrupts. Therefore, there is no need to select any MSIX vector other than the existing vector 0 for CQ creation. In light of these factors, and to enhance predictability, we modify the code to consistently select MSIX vector 0 for CQ creation. Fixes: 297cccebdc5a ("net/mlx5: DR, Expose an internal API to issue RDMA operations") Signed-off-by: Shahar Shitrit Reviewed-by: Yevgeny Kliteynik Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20241220081505.1286093-2-tariqt@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin (cherry picked from commit 43e589ab372f0e6452c28a7759111b1f2993f99d) --- drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c index 6fa06ba2d3465..f57c84e5128bc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c @@ -1067,7 +1067,6 @@ static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev, int inlen, err, eqn; void *cqc, *in; __be64 *pas; - int vector; u32 i; cq = kzalloc(sizeof(*cq), GFP_KERNEL); @@ -1096,8 +1095,7 @@ static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev, if (!in) goto err_cqwq; - vector = raw_smp_processor_id() % mlx5_comp_vectors_max(mdev); - err = mlx5_comp_eqn_get(mdev, vector, &eqn); + err = mlx5_comp_eqn_get(mdev, 0, &eqn); if (err) { kvfree(in); goto err_cqwq; From df1ca845be58ccd9be0f566f15b7db439d310291 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Fri, 20 Dec 2024 10:15:03 +0200 Subject: [PATCH 149/216] net/mlx5e: macsec: Maintain TX SA from encoding_sa [ Upstream commit 8c6254479b3d5bd788d2b5fefaa48fb194331ed0 ] In MACsec, it is possible to create multiple active TX SAs on a SC, but only one such SA can be used at a time for transmission. This SA is selected through the encoding_sa link parameter. When there are 2 or more active TX SAs configured (encoding_sa=0): ip macsec add macsec0 tx sa 0 pn 1 on key 00 ip macsec add macsec0 tx sa 1 pn 1 on key 00 ... the traffic should be still sent via TX SA 0 as the encoding_sa was not changed. However, the driver ignores the encoding_sa and overrides it to SA 1 by installing the flow steering id of the newly created TX SA into the SCI -> flow steering id hash map. The future packet tx descriptors will point to the incorrect flow steering rule (SA 1). This patch fixes the issue by avoiding the creation of the flow steering rule for an active TX SA that is not the encoding_sa. The driver side tx_sa object and the FW side macsec object are still created. When the encoding_sa link parameter is changed to another active TX SA, only the new flow steering rule will be created in the mlx5e_macsec_upd_txsa() handler. Fixes: 8ff0ac5be144 ("net/mlx5: Add MACsec offload Tx command support") Signed-off-by: Dragos Tatulea Reviewed-by: Cosmin Ratiu Reviewed-by: Lior Nahmanson Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20241220081505.1286093-3-tariqt@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin (cherry picked from commit e66a99b9177bf57f4370d93e201228b8c294aac0) --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c index cc9bcc4200324..6ab02f3fc2912 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c @@ -339,9 +339,13 @@ static int mlx5e_macsec_init_sa_fs(struct macsec_context *ctx, { struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev); struct mlx5_macsec_fs *macsec_fs = priv->mdev->macsec_fs; + const struct macsec_tx_sc *tx_sc = &ctx->secy->tx_sc; struct mlx5_macsec_rule_attrs rule_attrs; union mlx5_macsec_rule *macsec_rule; + if (is_tx && tx_sc->encoding_sa != sa->assoc_num) + return 0; + rule_attrs.macsec_obj_id = sa->macsec_obj_id; rule_attrs.sci = sa->sci; rule_attrs.assoc_num = sa->assoc_num; From 978e30177a0fb598957b5fa722dd68e28c22ec8d Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Fri, 20 Dec 2024 10:15:04 +0200 Subject: [PATCH 150/216] net/mlx5e: Skip restore TC rules for vport rep without loaded flag [ Upstream commit 5a03b368562a7ff5f5f1f63b5adf8309cbdbd5be ] During driver unload, unregister_netdev is called after unloading vport rep. So, the mlx5e_rep_priv is already freed while trying to get rpriv->netdev, or walk rpriv->tc_ht, which results in use-after-free. So add the checking to make sure access the data of vport rep which is still loaded. Fixes: d1569537a837 ("net/mlx5e: Modify and restore TC rules for IPSec TX rules") Signed-off-by: Jianbo Liu Reviewed-by: Saeed Mahameed Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20241220081505.1286093-4-tariqt@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin (cherry picked from commit 3e45dd1622a2c1a83c11bf42fdd8c1810123d6c0) --- drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c | 6 +++--- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 3 +++ drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 3 --- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c index 13b5916b64e22..eed8fcde26138 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c @@ -150,11 +150,11 @@ void mlx5_esw_ipsec_restore_dest_uplink(struct mlx5_core_dev *mdev) unsigned long i; int err; - xa_for_each(&esw->offloads.vport_reps, i, rep) { - rpriv = rep->rep_data[REP_ETH].priv; - if (!rpriv || !rpriv->netdev) + mlx5_esw_for_each_rep(esw, i, rep) { + if (atomic_read(&rep->rep_data[REP_ETH].state) != REP_LOADED) continue; + rpriv = rep->rep_data[REP_ETH].priv; rhashtable_walk_enter(&rpriv->tc_ht, &iter); rhashtable_walk_start(&iter); while ((flow = rhashtable_walk_next(&iter)) != NULL) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 9b771b572593b..3e58e731b5697 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -713,6 +713,9 @@ void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw); MLX5_CAP_GEN_2((esw->dev), ec_vf_vport_base) +\ (last) - 1) +#define mlx5_esw_for_each_rep(esw, i, rep) \ + xa_for_each(&((esw)->offloads.vport_reps), i, rep) + struct mlx5_eswitch *__must_check mlx5_devlink_eswitch_get(struct devlink *devlink); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 58529d1a98b37..7eba3a5bb97ca 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -52,9 +52,6 @@ #include "lag/lag.h" #include "en/tc/post_meter.h" -#define mlx5_esw_for_each_rep(esw, i, rep) \ - xa_for_each(&((esw)->offloads.vport_reps), i, rep) - /* There are two match-all miss flows, one for unicast dst mac and * one for multicast. */ From befba0bdacf78ae9ccb5a0939df84dd78ee4a797 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 19 Dec 2024 16:00:19 -0500 Subject: [PATCH 151/216] drm/i915/dg1: Fix power gate sequence. [ Upstream commit 20e7c5313ffbf11c34a46395345677adbe890bee ] sub-pipe PG is not present on DG1. Setting these bits can disable other power gates and cause GPU hangs on video playbacks. VLK: 16314, 4304 Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/13381 Fixes: 85a12d7eb8fe ("drm/i915/tgl: Fix Media power gate sequence.") Cc: Vinay Belgaumkar Cc: Himal Prasad Ghimiray Reviewed-by: Vinay Belgaumkar Reviewed-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20241219210019.70532-1-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi (cherry picked from commit de7061947b4ed4be857d452c60d5fb795831d79e) Signed-off-by: Tvrtko Ursulin Signed-off-by: Sasha Levin (cherry picked from commit c9b344ada5fdda9db8382a68cd82da873f6c4e58) --- drivers/gpu/drm/i915/gt/intel_rc6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index 8f4b3c8af09cc..b542214ad8071 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -133,7 +133,7 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6) GEN9_MEDIA_PG_ENABLE | GEN11_MEDIA_SAMPLER_PG_ENABLE; - if (GRAPHICS_VER(gt->i915) >= 12) { + if (GRAPHICS_VER(gt->i915) >= 12 && !IS_DG1(gt->i915)) { for (i = 0; i < I915_MAX_VCS; i++) if (HAS_ENGINE(gt, _VCS(i))) pg_enable |= (VDN_HCP_POWERGATE_ENABLE(i) | From 525a7a2919e7a965d09297bee4e4e22f516c18a4 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 21 Dec 2024 00:29:20 +0100 Subject: [PATCH 152/216] netfilter: nft_set_hash: unaligned atomic read on struct nft_set_ext [ Upstream commit 542ed8145e6f9392e3d0a86a0e9027d2ffd183e4 ] Access to genmask field in struct nft_set_ext results in unaligned atomic read: [ 72.130109] Unable to handle kernel paging request at virtual address ffff0000c2bb708c [ 72.131036] Mem abort info: [ 72.131213] ESR = 0x0000000096000021 [ 72.131446] EC = 0x25: DABT (current EL), IL = 32 bits [ 72.132209] SET = 0, FnV = 0 [ 72.133216] EA = 0, S1PTW = 0 [ 72.134080] FSC = 0x21: alignment fault [ 72.135593] Data abort info: [ 72.137194] ISV = 0, ISS = 0x00000021, ISS2 = 0x00000000 [ 72.142351] CM = 0, WnR = 0, TnD = 0, TagAccess = 0 [ 72.145989] GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 [ 72.150115] swapper pgtable: 4k pages, 48-bit VAs, pgdp=0000000237d27000 [ 72.154893] [ffff0000c2bb708c] pgd=0000000000000000, p4d=180000023ffff403, pud=180000023f84b403, pmd=180000023f835403, +pte=0068000102bb7707 [ 72.163021] Internal error: Oops: 0000000096000021 [#1] SMP [...] [ 72.170041] CPU: 7 UID: 0 PID: 54 Comm: kworker/7:0 Tainted: G E 6.13.0-rc3+ #2 [ 72.170509] Tainted: [E]=UNSIGNED_MODULE [ 72.170720] Hardware name: QEMU QEMU Virtual Machine, BIOS edk2-stable202302-for-qemu 03/01/2023 [ 72.171192] Workqueue: events_power_efficient nft_rhash_gc [nf_tables] [ 72.171552] pstate: 21400005 (nzCv daif +PAN -UAO -TCO +DIT -SSBS BTYPE=--) [ 72.171915] pc : nft_rhash_gc+0x200/0x2d8 [nf_tables] [ 72.172166] lr : nft_rhash_gc+0x128/0x2d8 [nf_tables] [ 72.172546] sp : ffff800081f2bce0 [ 72.172724] x29: ffff800081f2bd40 x28: ffff0000c2bb708c x27: 0000000000000038 [ 72.173078] x26: ffff0000c6780ef0 x25: ffff0000c643df00 x24: ffff0000c6778f78 [ 72.173431] x23: 000000000000001a x22: ffff0000c4b1f000 x21: ffff0000c6780f78 [ 72.173782] x20: ffff0000c2bb70dc x19: ffff0000c2bb7080 x18: 0000000000000000 [ 72.174135] x17: ffff0000c0a4e1c0 x16: 0000000000003000 x15: 0000ac26d173b978 [ 72.174485] x14: ffffffffffffffff x13: 0000000000000030 x12: ffff0000c6780ef0 [ 72.174841] x11: 0000000000000000 x10: ffff800081f2bcf8 x9 : ffff0000c3000000 [ 72.175193] x8 : 00000000000004be x7 : 0000000000000000 x6 : 0000000000000000 [ 72.175544] x5 : 0000000000000040 x4 : ffff0000c3000010 x3 : 0000000000000000 [ 72.175871] x2 : 0000000000003a98 x1 : ffff0000c2bb708c x0 : 0000000000000004 [ 72.176207] Call trace: [ 72.176316] nft_rhash_gc+0x200/0x2d8 [nf_tables] (P) [ 72.176653] process_one_work+0x178/0x3d0 [ 72.176831] worker_thread+0x200/0x3f0 [ 72.176995] kthread+0xe8/0xf8 [ 72.177130] ret_from_fork+0x10/0x20 [ 72.177289] Code: 54fff984 d503201f d2800080 91003261 (f820303f) [ 72.177557] ---[ end trace 0000000000000000 ]--- Align struct nft_set_ext to word size to address this and documentation it. pahole reports that this increases the size of elements for rhash and pipapo in 8 bytes on x86_64. Fixes: 7ffc7481153b ("netfilter: nft_set_hash: skip duplicated elements pending gc run") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin (cherry picked from commit 4f49349c1963e507aa37c1ec05178faeb0103959) --- include/net/netfilter/nf_tables.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index b5f9ee5810a34..8321915dddb28 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -721,15 +721,18 @@ struct nft_set_ext_tmpl { /** * struct nft_set_ext - set extensions * - * @genmask: generation mask + * @genmask: generation mask, but also flags (see NFT_SET_ELEM_DEAD_BIT) * @offset: offsets of individual extension types * @data: beginning of extension data + * + * This structure must be aligned to word size, otherwise atomic bitops + * on genmask field can cause alignment failure on some archs. */ struct nft_set_ext { u8 genmask; u8 offset[NFT_SET_EXT_NUM]; char data[]; -}; +} __aligned(BITS_PER_LONG / 8); static inline void nft_set_ext_prepare(struct nft_set_ext_tmpl *tmpl) { From af18297200b95288aa66629036cdd4bbc59cdbc9 Mon Sep 17 00:00:00 2001 From: Antonio Pastor Date: Tue, 24 Dec 2024 20:07:20 -0500 Subject: [PATCH 153/216] net: llc: reset skb->transport_header [ Upstream commit a024e377efed31ecfb39210bed562932321345b3 ] 802.2+LLC+SNAP frames received by napi_complete_done with GRO and DSA have skb->transport_header set two bytes short, or pointing 2 bytes before network_header & skb->data. As snap_rcv expects transport_header to point to SNAP header (OID:PID) after LLC processing advances offset over LLC header (llc_rcv & llc_fixup_skb), code doesn't find a match and packet is dropped. Between napi_complete_done and snap_rcv, transport_header is not used until __netif_receive_skb_core, where originally it was being reset. Commit fda55eca5a33 ("net: introduce skb_transport_header_was_set()") only does so if not set, on the assumption the value was set correctly by GRO (and also on assumption that "network stacks usually reset the transport header anyway"). Afterwards it is moved forward by llc_fixup_skb. Locally generated traffic shows up at __netif_receive_skb_core with no transport_header set and is processed without issue. On a setup with GRO but no DSA, transport_header and network_header are both set to point to skb->data which is also correct. As issue is LLC specific, to avoid impacting non-LLC traffic, and to follow up on original assumption made on previous code change, llc_fixup_skb to reset the offset after skb pull. llc_fixup_skb assumes the LLC header is at skb->data, and by definition SNAP header immediately follows. Fixes: fda55eca5a33 ("net: introduce skb_transport_header_was_set()") Signed-off-by: Antonio Pastor Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241225010723.2830290-1-antonio.pastor@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin (cherry picked from commit 0c896816aa193e6459fc947747e5753c06b395b9) --- net/llc/llc_input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c index 51bccfb00a9cd..61b0159b2fbee 100644 --- a/net/llc/llc_input.c +++ b/net/llc/llc_input.c @@ -124,8 +124,8 @@ static inline int llc_fixup_skb(struct sk_buff *skb) if (unlikely(!pskb_may_pull(skb, llc_len))) return 0; - skb->transport_header += llc_len; skb_pull(skb, llc_len); + skb_reset_transport_header(skb); if (skb->protocol == htons(ETH_P_802_2)) { __be16 pdulen; s32 data_size; From 78dab1c692a88edcffd95dd2b0c393459c4820a5 Mon Sep 17 00:00:00 2001 From: Tanya Agarwal Date: Sun, 29 Dec 2024 11:32:42 +0530 Subject: [PATCH 154/216] ALSA: usb-audio: US16x08: Initialize array before use [ Upstream commit b06a6187ef983f501e93faa56209169752d3bde3 ] Initialize meter_urb array before use in mixer_us16x08.c. CID 1410197: (#1 of 1): Uninitialized scalar variable (UNINIT) uninit_use_in_call: Using uninitialized value *meter_urb when calling get_meter_levels_from_urb. Coverity Link: https://scan7.scan.coverity.com/#/project-view/52849/11354?selectedIssue=1410197 Fixes: d2bb390a2081 ("ALSA: usb-audio: Tascam US-16x08 DSP mixer quirk") Signed-off-by: Tanya Agarwal Link: https://patch.msgid.link/20241229060240.1642-1-tanyaagarwal25699@gmail.com Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin (cherry picked from commit 930f2f96734e2675853bc48809778a5770a810e9) --- sound/usb/mixer_us16x08.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/usb/mixer_us16x08.c b/sound/usb/mixer_us16x08.c index 6eb7d93b358d9..20ac32635f1f5 100644 --- a/sound/usb/mixer_us16x08.c +++ b/sound/usb/mixer_us16x08.c @@ -687,7 +687,7 @@ static int snd_us16x08_meter_get(struct snd_kcontrol *kcontrol, struct usb_mixer_elem_info *elem = kcontrol->private_data; struct snd_usb_audio *chip = elem->head.mixer->chip; struct snd_us16x08_meter_store *store = elem->private_data; - u8 meter_urb[64]; + u8 meter_urb[64] = {0}; switch (kcontrol->private_value) { case 0: { From e3cfc39fa65e4528cc0803284dc2e789b0782c74 Mon Sep 17 00:00:00 2001 From: Vitalii Mordan Date: Fri, 27 Dec 2024 15:30:07 +0300 Subject: [PATCH 155/216] eth: bcmsysport: fix call balance of priv->clk handling routines [ Upstream commit b255ef45fcc2141c1bf98456796abb956d843a27 ] Check the return value of clk_prepare_enable to ensure that priv->clk has been successfully enabled. If priv->clk was not enabled during bcm_sysport_probe, bcm_sysport_resume, or bcm_sysport_open, it must not be disabled in any subsequent execution paths. Fixes: 31bc72d97656 ("net: systemport: fetch and use clock resources") Signed-off-by: Vitalii Mordan Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20241227123007.2333397-1-mordan@ispras.ru Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin (cherry picked from commit d5ea3a4d02d882179bf14b642582aaca36f3f216) --- drivers/net/ethernet/broadcom/bcmsysport.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 49e890a7e04a3..23cc2d85994e4 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -1967,7 +1967,11 @@ static int bcm_sysport_open(struct net_device *dev) unsigned int i; int ret; - clk_prepare_enable(priv->clk); + ret = clk_prepare_enable(priv->clk); + if (ret) { + netdev_err(dev, "could not enable priv clock\n"); + return ret; + } /* Reset UniMAC */ umac_reset(priv); @@ -2625,7 +2629,11 @@ static int bcm_sysport_probe(struct platform_device *pdev) goto err_deregister_notifier; } - clk_prepare_enable(priv->clk); + ret = clk_prepare_enable(priv->clk); + if (ret) { + dev_err(&pdev->dev, "could not enable priv clock\n"); + goto err_deregister_netdev; + } priv->rev = topctrl_readl(priv, REV_CNTL) & REV_MASK; dev_info(&pdev->dev, @@ -2639,6 +2647,8 @@ static int bcm_sysport_probe(struct platform_device *pdev) return 0; +err_deregister_netdev: + unregister_netdev(dev); err_deregister_notifier: unregister_netdevice_notifier(&priv->netdev_notifier); err_deregister_fixed_link: @@ -2810,7 +2820,12 @@ static int __maybe_unused bcm_sysport_resume(struct device *d) if (!netif_running(dev)) return 0; - clk_prepare_enable(priv->clk); + ret = clk_prepare_enable(priv->clk); + if (ret) { + netdev_err(dev, "could not enable priv clock\n"); + return ret; + } + if (priv->wolopts) clk_disable_unprepare(priv->wol_clk); From 5489d4cd35dda2f660e3fadda25c2e0ac975df3f Mon Sep 17 00:00:00 2001 From: Joe Hattori Date: Sat, 21 Dec 2024 17:14:48 +0900 Subject: [PATCH 156/216] net: mv643xx_eth: fix an OF node reference leak [ Upstream commit ad5c318086e2e23b577eca33559c5ebf89bc7eb9 ] Current implementation of mv643xx_eth_shared_of_add_port() calls of_parse_phandle(), but does not release the refcount on error. Call of_node_put() in the error path and in mv643xx_eth_shared_of_remove(). This bug was found by an experimental verification tool that I am developing. Fixes: 76723bca2802 ("net: mv643xx_eth: add DT parsing support") Signed-off-by: Joe Hattori Link: https://patch.msgid.link/20241221081448.3313163-1-joe@pf.is.s.u-tokyo.ac.jp Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin (cherry picked from commit 313474b108977ecb3cc00499dc7ce7b5639244de) --- drivers/net/ethernet/marvell/mv643xx_eth.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index 3b129a1c33815..07e5051171a48 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -2708,9 +2708,15 @@ static struct platform_device *port_platdev[3]; static void mv643xx_eth_shared_of_remove(void) { + struct mv643xx_eth_platform_data *pd; int n; for (n = 0; n < 3; n++) { + if (!port_platdev[n]) + continue; + pd = dev_get_platdata(&port_platdev[n]->dev); + if (pd) + of_node_put(pd->phy_node); platform_device_del(port_platdev[n]); port_platdev[n] = NULL; } @@ -2773,8 +2779,10 @@ static int mv643xx_eth_shared_of_add_port(struct platform_device *pdev, } ppdev = platform_device_alloc(MV643XX_ETH_NAME, dev_num); - if (!ppdev) - return -ENOMEM; + if (!ppdev) { + ret = -ENOMEM; + goto put_err; + } ppdev->dev.coherent_dma_mask = DMA_BIT_MASK(32); ppdev->dev.of_node = pnp; @@ -2796,6 +2804,8 @@ static int mv643xx_eth_shared_of_add_port(struct platform_device *pdev, port_err: platform_device_put(ppdev); +put_err: + of_node_put(ppd.phy_node); return ret; } From 9f144597940abf8c045d09e6d4fe3612d568e2a2 Mon Sep 17 00:00:00 2001 From: Jinjian Song Date: Tue, 24 Dec 2024 12:15:52 +0800 Subject: [PATCH 157/216] net: wwan: t7xx: Fix FSM command timeout issue [ Upstream commit 4f619d518db9cd1a933c3a095a5f95d0c1584ae8 ] When driver processes the internal state change command, it use an asynchronous thread to process the command operation. If the main thread detects that the task has timed out, the asynchronous thread will panic when executing the completion notification because the main thread completion object has been released. BUG: unable to handle page fault for address: fffffffffffffff8 PGD 1f283a067 P4D 1f283a067 PUD 1f283c067 PMD 0 Oops: 0000 [#1] PREEMPT SMP NOPTI RIP: 0010:complete_all+0x3e/0xa0 [...] Call Trace: ? __die_body+0x68/0xb0 ? page_fault_oops+0x379/0x3e0 ? exc_page_fault+0x69/0xa0 ? asm_exc_page_fault+0x22/0x30 ? complete_all+0x3e/0xa0 fsm_main_thread+0xa3/0x9c0 [mtk_t7xx (HASH:1400 5)] ? __pfx_autoremove_wake_function+0x10/0x10 kthread+0xd8/0x110 ? __pfx_fsm_main_thread+0x10/0x10 [mtk_t7xx (HASH:1400 5)] ? __pfx_kthread+0x10/0x10 ret_from_fork+0x38/0x50 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1b/0x30 [...] CR2: fffffffffffffff8 ---[ end trace 0000000000000000 ]--- Use the reference counter to ensure safe release as Sergey suggests: https://lore.kernel.org/all/da90f64c-260a-4329-87bf-1f9ff20a5951@gmail.com/ Fixes: 13e920d93e37 ("net: wwan: t7xx: Add core components") Signed-off-by: Jinjian Song Acked-by: Sergey Ryazanov Link: https://patch.msgid.link/20241224041552.8711-1-jinjian.song@fibocom.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin (cherry picked from commit 0cd3bde081cd3452c875fa1e5c55834c670d6e05) --- drivers/net/wwan/t7xx/t7xx_state_monitor.c | 26 ++++++++++++++-------- drivers/net/wwan/t7xx/t7xx_state_monitor.h | 5 +++-- 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/drivers/net/wwan/t7xx/t7xx_state_monitor.c b/drivers/net/wwan/t7xx/t7xx_state_monitor.c index 80edb8e75a6ad..64868df3640d1 100644 --- a/drivers/net/wwan/t7xx/t7xx_state_monitor.c +++ b/drivers/net/wwan/t7xx/t7xx_state_monitor.c @@ -97,14 +97,21 @@ void t7xx_fsm_broadcast_state(struct t7xx_fsm_ctl *ctl, enum md_state state) fsm_state_notify(ctl->md, state); } +static void fsm_release_command(struct kref *ref) +{ + struct t7xx_fsm_command *cmd = container_of(ref, typeof(*cmd), refcnt); + + kfree(cmd); +} + static void fsm_finish_command(struct t7xx_fsm_ctl *ctl, struct t7xx_fsm_command *cmd, int result) { if (cmd->flag & FSM_CMD_FLAG_WAIT_FOR_COMPLETION) { - *cmd->ret = result; - complete_all(cmd->done); + cmd->result = result; + complete_all(&cmd->done); } - kfree(cmd); + kref_put(&cmd->refcnt, fsm_release_command); } static void fsm_del_kf_event(struct t7xx_fsm_event *event) @@ -396,7 +403,6 @@ static int fsm_main_thread(void *data) int t7xx_fsm_append_cmd(struct t7xx_fsm_ctl *ctl, enum t7xx_fsm_cmd_state cmd_id, unsigned int flag) { - DECLARE_COMPLETION_ONSTACK(done); struct t7xx_fsm_command *cmd; unsigned long flags; int ret; @@ -408,11 +414,13 @@ int t7xx_fsm_append_cmd(struct t7xx_fsm_ctl *ctl, enum t7xx_fsm_cmd_state cmd_id INIT_LIST_HEAD(&cmd->entry); cmd->cmd_id = cmd_id; cmd->flag = flag; + kref_init(&cmd->refcnt); if (flag & FSM_CMD_FLAG_WAIT_FOR_COMPLETION) { - cmd->done = &done; - cmd->ret = &ret; + init_completion(&cmd->done); + kref_get(&cmd->refcnt); } + kref_get(&cmd->refcnt); spin_lock_irqsave(&ctl->command_lock, flags); list_add_tail(&cmd->entry, &ctl->command_queue); spin_unlock_irqrestore(&ctl->command_lock, flags); @@ -422,11 +430,11 @@ int t7xx_fsm_append_cmd(struct t7xx_fsm_ctl *ctl, enum t7xx_fsm_cmd_state cmd_id if (flag & FSM_CMD_FLAG_WAIT_FOR_COMPLETION) { unsigned long wait_ret; - wait_ret = wait_for_completion_timeout(&done, + wait_ret = wait_for_completion_timeout(&cmd->done, msecs_to_jiffies(FSM_CMD_TIMEOUT_MS)); - if (!wait_ret) - return -ETIMEDOUT; + ret = wait_ret ? cmd->result : -ETIMEDOUT; + kref_put(&cmd->refcnt, fsm_release_command); return ret; } diff --git a/drivers/net/wwan/t7xx/t7xx_state_monitor.h b/drivers/net/wwan/t7xx/t7xx_state_monitor.h index b6e76f3903c89..74f96fd2605e8 100644 --- a/drivers/net/wwan/t7xx/t7xx_state_monitor.h +++ b/drivers/net/wwan/t7xx/t7xx_state_monitor.h @@ -109,8 +109,9 @@ struct t7xx_fsm_command { struct list_head entry; enum t7xx_fsm_cmd_state cmd_id; unsigned int flag; - struct completion *done; - int *ret; + struct completion done; + int result; + struct kref refcnt; }; struct t7xx_fsm_notifier { From 2fdc6c53d6adce5befaa7ab2837a8f2c1dcbaea6 Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Tue, 31 Dec 2024 09:34:16 +0800 Subject: [PATCH 158/216] RDMA/rtrs: Ensure 'ib_sge list' is accessible [ Upstream commit fb514b31395946022f13a08e06a435f53cf9e8b3 ] Move the declaration of the 'ib_sge list' variable outside the 'always_invalidate' block to ensure it remains accessible for use throughout the function. Previously, 'ib_sge list' was declared within the 'always_invalidate' block, limiting its accessibility, then caused a 'BUG: kernel NULL pointer dereference'[1]. ? __die_body.cold+0x19/0x27 ? page_fault_oops+0x15a/0x2d0 ? search_module_extables+0x19/0x60 ? search_bpf_extables+0x5f/0x80 ? exc_page_fault+0x7e/0x180 ? asm_exc_page_fault+0x26/0x30 ? memcpy_orig+0xd5/0x140 rxe_mr_copy+0x1c3/0x200 [rdma_rxe] ? rxe_pool_get_index+0x4b/0x80 [rdma_rxe] copy_data+0xa5/0x230 [rdma_rxe] rxe_requester+0xd9b/0xf70 [rdma_rxe] ? finish_task_switch.isra.0+0x99/0x2e0 rxe_sender+0x13/0x40 [rdma_rxe] do_task+0x68/0x1e0 [rdma_rxe] process_one_work+0x177/0x330 worker_thread+0x252/0x390 ? __pfx_worker_thread+0x10/0x10 This change ensures the variable is available for subsequent operations that require it. [1] https://lore.kernel.org/linux-rdma/6a1f3e8f-deb0-49f9-bc69-a9b03ecfcda7@fujitsu.com/ Fixes: 9cb837480424 ("RDMA/rtrs: server: main functionality") Signed-off-by: Li Zhijian Link: https://patch.msgid.link/20241231013416.1290920-1-lizhijian@fujitsu.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin (cherry picked from commit b238f61cc394d5fef27b26d7d9aa383ebfddabb0) --- drivers/infiniband/ulp/rtrs/rtrs-srv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index 758a3d9c2844d..84d1654148d76 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -346,6 +346,7 @@ static int send_io_resp_imm(struct rtrs_srv_con *con, struct rtrs_srv_op *id, struct rtrs_srv_mr *srv_mr; bool need_inval = false; enum ib_send_flags flags; + struct ib_sge list; u32 imm; int err; @@ -398,7 +399,6 @@ static int send_io_resp_imm(struct rtrs_srv_con *con, struct rtrs_srv_op *id, imm = rtrs_to_io_rsp_imm(id->msg_id, errno, need_inval); imm_wr.wr.next = NULL; if (always_invalidate) { - struct ib_sge list; struct rtrs_msg_rkey_rsp *msg; srv_mr = &srv_path->mrs[id->msg_id]; From d693dd3c948280ca1f2953834ca772903131c1e8 Mon Sep 17 00:00:00 2001 From: Liang Jie Date: Mon, 30 Dec 2024 17:37:09 +0800 Subject: [PATCH 159/216] net: sfc: Correct key_len for efx_tc_ct_zone_ht_params [ Upstream commit a8620de72e5676993ec3a3b975f7c10908f5f60f ] In efx_tc_ct_zone_ht_params, the key_len was previously set to offsetof(struct efx_tc_ct_zone, linkage). This calculation is incorrect because it includes any padding between the zone field and the linkage field due to structure alignment, which can vary between systems. This patch updates key_len to use sizeof_field(struct efx_tc_ct_zone, zone) , ensuring that the hash table correctly uses the zone as the key. This fix prevents potential hash lookup errors and improves connection tracking reliability. Fixes: c3bb5c6acd4e ("sfc: functions to register for conntrack zone offload") Signed-off-by: Liang Jie Acked-by: Edward Cree Link: https://patch.msgid.link/20241230093709.3226854-1-buaajxlj@163.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin (cherry picked from commit 9eea3703c882876e5713071d51a510fecd3471d5) --- drivers/net/ethernet/sfc/tc_conntrack.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sfc/tc_conntrack.c b/drivers/net/ethernet/sfc/tc_conntrack.c index 44bb57670340d..109d2aa34ae33 100644 --- a/drivers/net/ethernet/sfc/tc_conntrack.c +++ b/drivers/net/ethernet/sfc/tc_conntrack.c @@ -16,7 +16,7 @@ static int efx_tc_flow_block(enum tc_setup_type type, void *type_data, void *cb_priv); static const struct rhashtable_params efx_tc_ct_zone_ht_params = { - .key_len = offsetof(struct efx_tc_ct_zone, linkage), + .key_len = sizeof_field(struct efx_tc_ct_zone, zone), .key_offset = 0, .head_offset = offsetof(struct efx_tc_ct_zone, linkage), }; From 671e939d37df4445f5ce50ddd63beebea557bc0a Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 1 Jan 2025 11:47:40 -0500 Subject: [PATCH 160/216] net: reenable NETIF_F_IPV6_CSUM offload for BIG TCP packets [ Upstream commit 68e068cabd2c6c533ef934c2e5151609cf6ecc6d ] The blamed commit disabled hardware offoad of IPv6 packets with extension headers on devices that advertise NETIF_F_IPV6_CSUM, based on the definition of that feature in skbuff.h: * * - %NETIF_F_IPV6_CSUM * - Driver (device) is only able to checksum plain * TCP or UDP packets over IPv6. These are specifically * unencapsulated packets of the form IPv6|TCP or * IPv6|UDP where the Next Header field in the IPv6 * header is either TCP or UDP. IPv6 extension headers * are not supported with this feature. This feature * cannot be set in features for a device with * NETIF_F_HW_CSUM also set. This feature is being * DEPRECATED (see below). The change causes skb_warn_bad_offload to fire for BIG TCP packets. [ 496.310233] WARNING: CPU: 13 PID: 23472 at net/core/dev.c:3129 skb_warn_bad_offload+0xc4/0xe0 [ 496.310297] ? skb_warn_bad_offload+0xc4/0xe0 [ 496.310300] skb_checksum_help+0x129/0x1f0 [ 496.310303] skb_csum_hwoffload_help+0x150/0x1b0 [ 496.310306] validate_xmit_skb+0x159/0x270 [ 496.310309] validate_xmit_skb_list+0x41/0x70 [ 496.310312] sch_direct_xmit+0x5c/0x250 [ 496.310317] __qdisc_run+0x388/0x620 BIG TCP introduced an IPV6_TLV_JUMBO IPv6 extension header to communicate packet length, as this is an IPv6 jumbogram. But, the feature is only enabled on devices that support BIG TCP TSO. The header is only present for PF_PACKET taps like tcpdump, and not transmitted by physical devices. For this specific case of extension headers that are not transmitted, return to the situation before the blamed commit and support hardware offload. ipv6_has_hopopt_jumbo() tests not only whether this header is present, but also that it is the only extension header before a terminal (L4) header. Fixes: 04c20a9356f2 ("net: skip offload for NETIF_F_IPV6_CSUM if ipv6 header contains extension") Reported-by: syzbot Reported-by: Eric Dumazet Closes: https://lore.kernel.org/netdev/CANn89iK1hdC3Nt8KPhOtTF8vCPc1AHDCtse_BTNki1pWxAByTQ@mail.gmail.com/ Signed-off-by: Willem de Bruijn Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250101164909.1331680-1-willemdebruijn.kernel@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin (cherry picked from commit 95ccf006bbc8b59044313b8c309dcf29c546abd4) --- net/core/dev.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index 4beb9acf2c183..69da7b009f8b9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3628,8 +3628,10 @@ int skb_csum_hwoffload_help(struct sk_buff *skb, if (features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) { if (vlan_get_protocol(skb) == htons(ETH_P_IPV6) && - skb_network_header_len(skb) != sizeof(struct ipv6hdr)) + skb_network_header_len(skb) != sizeof(struct ipv6hdr) && + !ipv6_has_hopopt_jumbo(skb)) goto sw_checksum; + switch (skb->csum_offset) { case offsetof(struct tcphdr, check): case offsetof(struct udphdr, check): From b2211947962b2f9dcbe432711c2b163e23cf614f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 31 Dec 2024 16:05:27 +0000 Subject: [PATCH 161/216] net: restrict SO_REUSEPORT to inet sockets [ Upstream commit 5b0af621c3f6ef9261cf6067812f2fd9943acb4b ] After blamed commit, crypto sockets could accidentally be destroyed from RCU call back, as spotted by zyzbot [1]. Trying to acquire a mutex in RCU callback is not allowed. Restrict SO_REUSEPORT socket option to inet sockets. v1 of this patch supported TCP, UDP and SCTP sockets, but fcnal-test.sh test needed RAW and ICMP support. [1] BUG: sleeping function called from invalid context at kernel/locking/mutex.c:562 in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 24, name: ksoftirqd/1 preempt_count: 100, expected: 0 RCU nest depth: 0, expected: 0 1 lock held by ksoftirqd/1/24: #0: ffffffff8e937ba0 (rcu_callback){....}-{0:0}, at: rcu_lock_acquire include/linux/rcupdate.h:337 [inline] #0: ffffffff8e937ba0 (rcu_callback){....}-{0:0}, at: rcu_do_batch kernel/rcu/tree.c:2561 [inline] #0: ffffffff8e937ba0 (rcu_callback){....}-{0:0}, at: rcu_core+0xa37/0x17a0 kernel/rcu/tree.c:2823 Preemption disabled at: [] softirq_handle_begin kernel/softirq.c:402 [inline] [] handle_softirqs+0x128/0x9b0 kernel/softirq.c:537 CPU: 1 UID: 0 PID: 24 Comm: ksoftirqd/1 Not tainted 6.13.0-rc3-syzkaller-00174-ga024e377efed #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/13/2024 Call Trace: __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0x241/0x360 lib/dump_stack.c:120 __might_resched+0x5d4/0x780 kernel/sched/core.c:8758 __mutex_lock_common kernel/locking/mutex.c:562 [inline] __mutex_lock+0x131/0xee0 kernel/locking/mutex.c:735 crypto_put_default_null_skcipher+0x18/0x70 crypto/crypto_null.c:179 aead_release+0x3d/0x50 crypto/algif_aead.c:489 alg_do_release crypto/af_alg.c:118 [inline] alg_sock_destruct+0x86/0xc0 crypto/af_alg.c:502 __sk_destruct+0x58/0x5f0 net/core/sock.c:2260 rcu_do_batch kernel/rcu/tree.c:2567 [inline] rcu_core+0xaaa/0x17a0 kernel/rcu/tree.c:2823 handle_softirqs+0x2d4/0x9b0 kernel/softirq.c:561 run_ksoftirqd+0xca/0x130 kernel/softirq.c:950 smpboot_thread_fn+0x544/0xa30 kernel/smpboot.c:164 kthread+0x2f0/0x390 kernel/kthread.c:389 ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244 Fixes: 8c7138b33e5c ("net: Unpublish sk from sk_reuseport_cb before call_rcu") Reported-by: syzbot+b3e02953598f447d4d2a@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/6772f2f4.050a0220.2f3838.04cb.GAE@google.com/T/#u Signed-off-by: Eric Dumazet Cc: Martin KaFai Lau Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20241231160527.3994168-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin (cherry picked from commit ad91a2dacbf8c26a446658cdd55e8324dfeff1e7) --- net/core/sock.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/core/sock.c b/net/core/sock.c index 9bbca86da357d..be9fdefc1904d 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1133,7 +1133,10 @@ int sk_setsockopt(struct sock *sk, int level, int optname, sk->sk_reuse = (valbool ? SK_CAN_REUSE : SK_NO_REUSE); break; case SO_REUSEPORT: - sk->sk_reuseport = valbool; + if (valbool && !sk_is_inet(sk)) + ret = -EOPNOTSUPP; + else + sk->sk_reuseport = valbool; break; case SO_TYPE: case SO_PROTOCOL: From 194d97052a1b2b094f6686cff84a6b21ec83410f Mon Sep 17 00:00:00 2001 From: "Maciej S. Szmigiero" Date: Sun, 29 Dec 2024 17:46:58 +0100 Subject: [PATCH 162/216] net: wwan: iosm: Properly check for valid exec stage in ipc_mmio_init() [ Upstream commit a7af435df0e04cfb4a4004136d597c42639a2ae7 ] ipc_mmio_init() used the post-decrement operator in its loop continuing condition of "retries" counter being "> 0", which meant that when this condition caused loop exit "retries" counter reached -1. But the later valid exec stage failure check only tests for "retries" counter being exactly zero, so it didn't trigger in this case (but would wrongly trigger if the code reaches a valid exec stage in the very last loop iteration). Fix this by using the pre-decrement operator instead, so the loop counter is exactly zero on valid exec stage failure. Fixes: dc0514f5d828 ("net: iosm: mmio scratchpad") Signed-off-by: Maciej S. Szmigiero Link: https://patch.msgid.link/8b19125a825f9dcdd81c667c1e5c48ba28d505a6.1735490770.git.mail@maciej.szmigiero.name Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin (cherry picked from commit 23f2e7a13fa48f906c1c851f8f948e28ff6b4637) --- drivers/net/wwan/iosm/iosm_ipc_mmio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wwan/iosm/iosm_ipc_mmio.c b/drivers/net/wwan/iosm/iosm_ipc_mmio.c index 63eb08c43c051..6764c13530b9b 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_mmio.c +++ b/drivers/net/wwan/iosm/iosm_ipc_mmio.c @@ -104,7 +104,7 @@ struct iosm_mmio *ipc_mmio_init(void __iomem *mmio, struct device *dev) break; msleep(20); - } while (retries-- > 0); + } while (--retries > 0); if (!retries) { dev_err(ipc_mmio->dev, "invalid exec stage %X", stage); From 25b088a53f90be7ad4ea58790eaeebffa763d183 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 30 Dec 2024 16:10:03 +0000 Subject: [PATCH 163/216] af_packet: fix vlan_get_tci() vs MSG_PEEK [ Upstream commit 77ee7a6d16b6ec07b5c3ae2b6b60a24c1afbed09 ] Blamed commit forgot MSG_PEEK case, allowing a crash [1] as found by syzbot. Rework vlan_get_tci() to not touch skb at all, so that it can be used from many cpus on the same skb. Add a const qualifier to skb argument. [1] skbuff: skb_under_panic: text:ffffffff8a8da482 len:32 put:14 head:ffff88807a1d5800 data:ffff88807a1d5810 tail:0x14 end:0x140 dev: ------------[ cut here ]------------ kernel BUG at net/core/skbuff.c:206 ! Oops: invalid opcode: 0000 [#1] PREEMPT SMP KASAN PTI CPU: 0 UID: 0 PID: 5880 Comm: syz-executor172 Not tainted 6.13.0-rc3-syzkaller-00762-g9268abe611b0 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/13/2024 RIP: 0010:skb_panic net/core/skbuff.c:206 [inline] RIP: 0010:skb_under_panic+0x14b/0x150 net/core/skbuff.c:216 Code: 0b 8d 48 c7 c6 9e 6c 26 8e 48 8b 54 24 08 8b 0c 24 44 8b 44 24 04 4d 89 e9 50 41 54 41 57 41 56 e8 3a 5a 79 f7 48 83 c4 20 90 <0f> 0b 0f 1f 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 f3 RSP: 0018:ffffc90003baf5b8 EFLAGS: 00010286 RAX: 0000000000000087 RBX: dffffc0000000000 RCX: 8565c1eec37aa000 RDX: 0000000000000000 RSI: 0000000080000000 RDI: 0000000000000000 RBP: ffff88802616fb50 R08: ffffffff817f0a4c R09: 1ffff92000775e50 R10: dffffc0000000000 R11: fffff52000775e51 R12: 0000000000000140 R13: ffff88807a1d5800 R14: ffff88807a1d5810 R15: 0000000000000014 FS: 00007fa03261f6c0(0000) GS:ffff8880b8600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007ffd65753000 CR3: 0000000031720000 CR4: 00000000003526f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: skb_push+0xe5/0x100 net/core/skbuff.c:2636 vlan_get_tci+0x272/0x550 net/packet/af_packet.c:565 packet_recvmsg+0x13c9/0x1ef0 net/packet/af_packet.c:3616 sock_recvmsg_nosec net/socket.c:1044 [inline] sock_recvmsg+0x22f/0x280 net/socket.c:1066 ____sys_recvmsg+0x1c6/0x480 net/socket.c:2814 ___sys_recvmsg net/socket.c:2856 [inline] do_recvmmsg+0x426/0xab0 net/socket.c:2951 __sys_recvmmsg net/socket.c:3025 [inline] __do_sys_recvmmsg net/socket.c:3048 [inline] __se_sys_recvmmsg net/socket.c:3041 [inline] __x64_sys_recvmmsg+0x199/0x250 net/socket.c:3041 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 Fixes: 79eecf631c14 ("af_packet: Handle outgoing VLAN packets without hardware offloading") Reported-by: syzbot+8400677f3fd43f37d3bc@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/6772c485.050a0220.2f3838.04c6.GAE@google.com/T/#u Signed-off-by: Eric Dumazet Cc: Chengen Du Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20241230161004.2681892-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin (cherry picked from commit 7aa78d0d8546d8ce5a764add3f55d72e707c18f1) --- net/packet/af_packet.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 56e3ae3b6be93..96eca4a290ad4 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -538,10 +538,8 @@ static void *packet_current_frame(struct packet_sock *po, return packet_lookup_frame(po, rb, rb->head, status); } -static u16 vlan_get_tci(struct sk_buff *skb, struct net_device *dev) +static u16 vlan_get_tci(const struct sk_buff *skb, struct net_device *dev) { - u8 *skb_orig_data = skb->data; - int skb_orig_len = skb->len; struct vlan_hdr vhdr, *vh; unsigned int header_len; @@ -562,12 +560,8 @@ static u16 vlan_get_tci(struct sk_buff *skb, struct net_device *dev) else return 0; - skb_push(skb, skb->data - skb_mac_header(skb)); - vh = skb_header_pointer(skb, header_len, sizeof(vhdr), &vhdr); - if (skb_orig_data != skb->data) { - skb->data = skb_orig_data; - skb->len = skb_orig_len; - } + vh = skb_header_pointer(skb, skb_mac_offset(skb) + header_len, + sizeof(vhdr), &vhdr); if (unlikely(!vh)) return 0; From 131d85f52c12dc01a51590d7f825e2924d87128b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 30 Dec 2024 16:10:04 +0000 Subject: [PATCH 164/216] af_packet: fix vlan_get_protocol_dgram() vs MSG_PEEK [ Upstream commit f91a5b8089389eb408501af2762f168c3aaa7b79 ] Blamed commit forgot MSG_PEEK case, allowing a crash [1] as found by syzbot. Rework vlan_get_protocol_dgram() to not touch skb at all, so that it can be used from many cpus on the same skb. Add a const qualifier to skb argument. [1] skbuff: skb_under_panic: text:ffffffff8a8ccd05 len:29 put:14 head:ffff88807fc8e400 data:ffff88807fc8e3f4 tail:0x11 end:0x140 dev: ------------[ cut here ]------------ kernel BUG at net/core/skbuff.c:206 ! Oops: invalid opcode: 0000 [#1] PREEMPT SMP KASAN PTI CPU: 1 UID: 0 PID: 5892 Comm: syz-executor883 Not tainted 6.13.0-rc4-syzkaller-00054-gd6ef8b40d075 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/13/2024 RIP: 0010:skb_panic net/core/skbuff.c:206 [inline] RIP: 0010:skb_under_panic+0x14b/0x150 net/core/skbuff.c:216 Code: 0b 8d 48 c7 c6 86 d5 25 8e 48 8b 54 24 08 8b 0c 24 44 8b 44 24 04 4d 89 e9 50 41 54 41 57 41 56 e8 5a 69 79 f7 48 83 c4 20 90 <0f> 0b 0f 1f 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 f3 RSP: 0018:ffffc900038d7638 EFLAGS: 00010282 RAX: 0000000000000087 RBX: dffffc0000000000 RCX: 609ffd18ea660600 RDX: 0000000000000000 RSI: 0000000080000000 RDI: 0000000000000000 RBP: ffff88802483c8d0 R08: ffffffff817f0a8c R09: 1ffff9200071ae60 R10: dffffc0000000000 R11: fffff5200071ae61 R12: 0000000000000140 R13: ffff88807fc8e400 R14: ffff88807fc8e3f4 R15: 0000000000000011 FS: 00007fbac5e006c0(0000) GS:ffff8880b8700000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fbac5e00d58 CR3: 000000001238e000 CR4: 00000000003526f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: skb_push+0xe5/0x100 net/core/skbuff.c:2636 vlan_get_protocol_dgram+0x165/0x290 net/packet/af_packet.c:585 packet_recvmsg+0x948/0x1ef0 net/packet/af_packet.c:3552 sock_recvmsg_nosec net/socket.c:1033 [inline] sock_recvmsg+0x22f/0x280 net/socket.c:1055 ____sys_recvmsg+0x1c6/0x480 net/socket.c:2803 ___sys_recvmsg net/socket.c:2845 [inline] do_recvmmsg+0x426/0xab0 net/socket.c:2940 __sys_recvmmsg net/socket.c:3014 [inline] __do_sys_recvmmsg net/socket.c:3037 [inline] __se_sys_recvmmsg net/socket.c:3030 [inline] __x64_sys_recvmmsg+0x199/0x250 net/socket.c:3030 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f Fixes: 79eecf631c14 ("af_packet: Handle outgoing VLAN packets without hardware offloading") Reported-by: syzbot+74f70bb1cb968bf09e4f@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/6772c485.050a0220.2f3838.04c5.GAE@google.com/T/#u Signed-off-by: Eric Dumazet Cc: Chengen Du Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20241230161004.2681892-2-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin (cherry picked from commit a693b87692b4d7c50f4fc08a996678d60534a9da) --- include/linux/if_vlan.h | 16 +++++++++++++--- net/packet/af_packet.c | 16 ++++------------ 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 3028af87716e2..430749a0f362a 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -585,13 +585,16 @@ static inline int vlan_get_tag(const struct sk_buff *skb, u16 *vlan_tci) * vlan_get_protocol - get protocol EtherType. * @skb: skbuff to query * @type: first vlan protocol + * @mac_offset: MAC offset * @depth: buffer to store length of eth and vlan tags in bytes * * Returns the EtherType of the packet, regardless of whether it is * vlan encapsulated (normal or hardware accelerated) or not. */ -static inline __be16 __vlan_get_protocol(const struct sk_buff *skb, __be16 type, - int *depth) +static inline __be16 __vlan_get_protocol_offset(const struct sk_buff *skb, + __be16 type, + int mac_offset, + int *depth) { unsigned int vlan_depth = skb->mac_len, parse_depth = VLAN_MAX_DEPTH; @@ -610,7 +613,8 @@ static inline __be16 __vlan_get_protocol(const struct sk_buff *skb, __be16 type, do { struct vlan_hdr vhdr, *vh; - vh = skb_header_pointer(skb, vlan_depth, sizeof(vhdr), &vhdr); + vh = skb_header_pointer(skb, mac_offset + vlan_depth, + sizeof(vhdr), &vhdr); if (unlikely(!vh || !--parse_depth)) return 0; @@ -625,6 +629,12 @@ static inline __be16 __vlan_get_protocol(const struct sk_buff *skb, __be16 type, return type; } +static inline __be16 __vlan_get_protocol(const struct sk_buff *skb, __be16 type, + int *depth) +{ + return __vlan_get_protocol_offset(skb, type, 0, depth); +} + /** * vlan_get_protocol - get protocol EtherType. * @skb: skbuff to query diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 96eca4a290ad4..4abf7e9ac4f2f 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -568,21 +568,13 @@ static u16 vlan_get_tci(const struct sk_buff *skb, struct net_device *dev) return ntohs(vh->h_vlan_TCI); } -static __be16 vlan_get_protocol_dgram(struct sk_buff *skb) +static __be16 vlan_get_protocol_dgram(const struct sk_buff *skb) { __be16 proto = skb->protocol; - if (unlikely(eth_type_vlan(proto))) { - u8 *skb_orig_data = skb->data; - int skb_orig_len = skb->len; - - skb_push(skb, skb->data - skb_mac_header(skb)); - proto = __vlan_get_protocol(skb, proto, NULL); - if (skb_orig_data != skb->data) { - skb->data = skb_orig_data; - skb->len = skb_orig_len; - } - } + if (unlikely(eth_type_vlan(proto))) + proto = __vlan_get_protocol_offset(skb, proto, + skb_mac_offset(skb), NULL); return proto; } From 6e788f7f0e12cf302d59b39b6d7176a8abfeb4d6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 30 Dec 2024 16:28:49 +0000 Subject: [PATCH 165/216] ila: serialize calls to nf_register_net_hooks() [ Upstream commit 260466b576bca0081a7d4acecc8e93687aa22d0e ] syzbot found a race in ila_add_mapping() [1] commit 031ae72825ce ("ila: call nf_unregister_net_hooks() sooner") attempted to fix a similar issue. Looking at the syzbot repro, we have concurrent ILA_CMD_ADD commands. Add a mutex to make sure at most one thread is calling nf_register_net_hooks(). [1] BUG: KASAN: slab-use-after-free in rht_key_hashfn include/linux/rhashtable.h:159 [inline] BUG: KASAN: slab-use-after-free in __rhashtable_lookup.constprop.0+0x426/0x550 include/linux/rhashtable.h:604 Read of size 4 at addr ffff888028f40008 by task dhcpcd/5501 CPU: 1 UID: 0 PID: 5501 Comm: dhcpcd Not tainted 6.13.0-rc4-syzkaller-00054-gd6ef8b40d075 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/13/2024 Call Trace: __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0x116/0x1f0 lib/dump_stack.c:120 print_address_description mm/kasan/report.c:378 [inline] print_report+0xc3/0x620 mm/kasan/report.c:489 kasan_report+0xd9/0x110 mm/kasan/report.c:602 rht_key_hashfn include/linux/rhashtable.h:159 [inline] __rhashtable_lookup.constprop.0+0x426/0x550 include/linux/rhashtable.h:604 rhashtable_lookup include/linux/rhashtable.h:646 [inline] rhashtable_lookup_fast include/linux/rhashtable.h:672 [inline] ila_lookup_wildcards net/ipv6/ila/ila_xlat.c:127 [inline] ila_xlat_addr net/ipv6/ila/ila_xlat.c:652 [inline] ila_nf_input+0x1ee/0x620 net/ipv6/ila/ila_xlat.c:185 nf_hook_entry_hookfn include/linux/netfilter.h:154 [inline] nf_hook_slow+0xbb/0x200 net/netfilter/core.c:626 nf_hook.constprop.0+0x42e/0x750 include/linux/netfilter.h:269 NF_HOOK include/linux/netfilter.h:312 [inline] ipv6_rcv+0xa4/0x680 net/ipv6/ip6_input.c:309 __netif_receive_skb_one_core+0x12e/0x1e0 net/core/dev.c:5672 __netif_receive_skb+0x1d/0x160 net/core/dev.c:5785 process_backlog+0x443/0x15f0 net/core/dev.c:6117 __napi_poll.constprop.0+0xb7/0x550 net/core/dev.c:6883 napi_poll net/core/dev.c:6952 [inline] net_rx_action+0xa94/0x1010 net/core/dev.c:7074 handle_softirqs+0x213/0x8f0 kernel/softirq.c:561 __do_softirq kernel/softirq.c:595 [inline] invoke_softirq kernel/softirq.c:435 [inline] __irq_exit_rcu+0x109/0x170 kernel/softirq.c:662 irq_exit_rcu+0x9/0x30 kernel/softirq.c:678 instr_sysvec_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1049 [inline] sysvec_apic_timer_interrupt+0xa4/0xc0 arch/x86/kernel/apic/apic.c:1049 Fixes: 7f00feaf1076 ("ila: Add generic ILA translation facility") Reported-by: syzbot+47e761d22ecf745f72b9@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/6772c9ae.050a0220.2f3838.04c7.GAE@google.com/T/#u Signed-off-by: Eric Dumazet Cc: Florian Westphal Cc: Tom Herbert Link: https://patch.msgid.link/20241230162849.2795486-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin (cherry picked from commit 17e8fa894345e8d2c7a7642482267b275c3d4553) --- net/ipv6/ila/ila_xlat.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c index 534a4498e280d..fff09f5a796a7 100644 --- a/net/ipv6/ila/ila_xlat.c +++ b/net/ipv6/ila/ila_xlat.c @@ -200,6 +200,8 @@ static const struct nf_hook_ops ila_nf_hook_ops[] = { }, }; +static DEFINE_MUTEX(ila_mutex); + static int ila_add_mapping(struct net *net, struct ila_xlat_params *xp) { struct ila_net *ilan = net_generic(net, ila_net_id); @@ -207,16 +209,20 @@ static int ila_add_mapping(struct net *net, struct ila_xlat_params *xp) spinlock_t *lock = ila_get_lock(ilan, xp->ip.locator_match); int err = 0, order; - if (!ilan->xlat.hooks_registered) { + if (!READ_ONCE(ilan->xlat.hooks_registered)) { /* We defer registering net hooks in the namespace until the * first mapping is added. */ - err = nf_register_net_hooks(net, ila_nf_hook_ops, - ARRAY_SIZE(ila_nf_hook_ops)); + mutex_lock(&ila_mutex); + if (!ilan->xlat.hooks_registered) { + err = nf_register_net_hooks(net, ila_nf_hook_ops, + ARRAY_SIZE(ila_nf_hook_ops)); + if (!err) + WRITE_ONCE(ilan->xlat.hooks_registered, true); + } + mutex_unlock(&ila_mutex); if (err) return err; - - ilan->xlat.hooks_registered = true; } ila = kzalloc(sizeof(*ila), GFP_KERNEL); From d3ce92d965c86c46758125ff6951d67a5ab096d2 Mon Sep 17 00:00:00 2001 From: Meghana Malladi Date: Mon, 23 Dec 2024 20:45:50 +0530 Subject: [PATCH 166/216] net: ti: icssg-prueth: Fix clearing of IEP_CMP_CFG registers during iep_init [ Upstream commit 9b115361248dc6cce182a2dc030c1c70b0a9639e ] When ICSSG interfaces are brought down and brought up again, the pru cores are shut down and booted again, flushing out all the memories and start again in a clean state. Hence it is expected that the IEP_CMP_CFG register needs to be flushed during iep_init() to ensure that the existing residual configuration doesn't cause any unusual behavior. If the register is not cleared, existing IEP_CMP_CFG set for CMP1 will result in SYNC0_OUT signal based on the SYNC_OUT register values. After bringing the interface up, calling PPS enable doesn't work as the driver believes PPS is already enabled, (iep->pps_enabled is not cleared during interface bring down) and driver will just return true even though there is no signal. Fix this by disabling pps and perout. Fixes: c1e0230eeaab ("net: ti: icss-iep: Add IEP driver") Signed-off-by: Meghana Malladi Reviewed-by: Roger Quadros Signed-off-by: David S. Miller Signed-off-by: Sasha Levin (cherry picked from commit d6b130fabfe197935346fe9f1e50a0947b2b1be7) --- drivers/net/ethernet/ti/icssg/icss_iep.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/ti/icssg/icss_iep.c b/drivers/net/ethernet/ti/icssg/icss_iep.c index 3025e9c189702..f06cdec14ed7a 100644 --- a/drivers/net/ethernet/ti/icssg/icss_iep.c +++ b/drivers/net/ethernet/ti/icssg/icss_iep.c @@ -290,6 +290,9 @@ static void icss_iep_enable_shadow_mode(struct icss_iep *iep) for (cmp = IEP_MIN_CMP; cmp < IEP_MAX_CMP; cmp++) { regmap_update_bits(iep->map, ICSS_IEP_CMP_STAT_REG, IEP_CMP_STATUS(cmp), IEP_CMP_STATUS(cmp)); + + regmap_update_bits(iep->map, ICSS_IEP_CMP_CFG_REG, + IEP_CMP_CFG_CMP_EN(cmp), 0); } /* enable reset counter on CMP0 event */ @@ -808,6 +811,11 @@ int icss_iep_exit(struct icss_iep *iep) } icss_iep_disable(iep); + if (iep->pps_enabled) + icss_iep_pps_enable(iep, false); + else if (iep->perout_enabled) + icss_iep_perout_enable(iep, NULL, false); + return 0; } EXPORT_SYMBOL_GPL(icss_iep_exit); From 86439bfb1536184e8677f13429810ae66aa98ef0 Mon Sep 17 00:00:00 2001 From: Issam Hamdi Date: Mon, 25 Nov 2024 17:29:20 +0100 Subject: [PATCH 167/216] wifi: mac80211: fix mbss changed flags corruption on 32 bit systems [ Upstream commit 49dba1ded8dd5a6a12748631403240b2ab245c34 ] On 32-bit systems, the size of an unsigned long is 4 bytes, while a u64 is 8 bytes. Therefore, when using or_each_set_bit(bit, &bits, sizeof(changed) * BITS_PER_BYTE), the code is incorrectly searching for a bit in a 32-bit variable that is expected to be 64 bits in size, leading to incorrect bit finding. Solution: Ensure that the size of the bits variable is correctly adjusted for each architecture. Call Trace: ? show_regs+0x54/0x58 ? __warn+0x6b/0xd4 ? ieee80211_link_info_change_notify+0xcc/0xd4 [mac80211] ? report_bug+0x113/0x150 ? exc_overflow+0x30/0x30 ? handle_bug+0x27/0x44 ? exc_invalid_op+0x18/0x50 ? handle_exception+0xf6/0xf6 ? exc_overflow+0x30/0x30 ? ieee80211_link_info_change_notify+0xcc/0xd4 [mac80211] ? exc_overflow+0x30/0x30 ? ieee80211_link_info_change_notify+0xcc/0xd4 [mac80211] ? ieee80211_mesh_work+0xff/0x260 [mac80211] ? cfg80211_wiphy_work+0x72/0x98 [cfg80211] ? process_one_work+0xf1/0x1fc ? worker_thread+0x2c0/0x3b4 ? kthread+0xc7/0xf0 ? mod_delayed_work_on+0x4c/0x4c ? kthread_complete_and_exit+0x14/0x14 ? ret_from_fork+0x24/0x38 ? kthread_complete_and_exit+0x14/0x14 ? ret_from_fork_asm+0xf/0x14 ? entry_INT80_32+0xf0/0xf0 Signed-off-by: Issam Hamdi Link: https://patch.msgid.link/20241125162920.2711462-1-ih@simonwunderlich.de [restore no-op path for no changes] Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin (cherry picked from commit 86772872f9f5097cd03d0e1c6813238bd38c250b) --- net/mac80211/mesh.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 25223184d6e5b..a5e7edd2f2d13 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -1173,14 +1173,14 @@ void ieee80211_mbss_info_change_notify(struct ieee80211_sub_if_data *sdata, u64 changed) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; - unsigned long bits = changed; + unsigned long bits[] = { BITMAP_FROM_U64(changed) }; u32 bit; - if (!bits) + if (!changed) return; /* if we race with running work, worst case this work becomes a noop */ - for_each_set_bit(bit, &bits, sizeof(changed) * BITS_PER_BYTE) + for_each_set_bit(bit, bits, sizeof(changed) * BITS_PER_BYTE) set_bit(bit, ifmsh->mbss_changed); set_bit(MESH_WORK_MBSS_CHANGED, &ifmsh->wrkq_flags); wiphy_work_queue(sdata->local->hw.wiphy, &sdata->work); From f1c084253d1c47322c87f83282c2a1bb41361ab0 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Tue, 19 Nov 2024 17:35:39 +0200 Subject: [PATCH 168/216] wifi: mac80211: wake the queues in case of failure in resume [ Upstream commit 220bf000530f9b1114fa2a1022a871c7ce8a0b38 ] In case we fail to resume, we'll WARN with "Hardware became unavailable during restart." and we'll wait until user space does something. It'll typically bring the interface down and up to recover. This won't work though because the queues are still stopped on IEEE80211_QUEUE_STOP_REASON_SUSPEND reason. Make sure we clear that reason so that we give a chance to the recovery to succeed. Signed-off-by: Emmanuel Grumbach Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219447 Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20241119173108.cd628f560f97.I76a15fdb92de450e5329940125f3c58916be3942@changeid Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin (cherry picked from commit 037ea0f28f9acfa2cecd8aec0e77097afb26e25d) --- net/mac80211/util.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/mac80211/util.c b/net/mac80211/util.c index cc3c46a820773..154b41af4157d 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -2586,6 +2586,9 @@ int ieee80211_reconfig(struct ieee80211_local *local) WARN(1, "Hardware became unavailable upon resume. This could be a software issue prior to suspend or a hardware issue.\n"); else WARN(1, "Hardware became unavailable during restart.\n"); + ieee80211_wake_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP, + IEEE80211_QUEUE_STOP_REASON_SUSPEND, + false); ieee80211_handle_reconfig_failure(local); return res; } From e531c5741b6ea5b20717446737943e0e5f922867 Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Tue, 5 Nov 2024 09:57:42 +0800 Subject: [PATCH 169/216] drm/amdkfd: Correct the migration DMA map direction [ Upstream commit 5c3de6b02d38eb9386edf50490e050bb44398e40 ] The SVM DMA device map direction should be set the same as the DMA unmap setting, otherwise the DMA core will report the following warning. Before finialize this solution, there're some discussion on the DMA mapping type(stream-based or coherent) in this KFD migration case, followed by https://lore.kernel.org/all/04d4ab32 -45a1-4b88-86ee-fb0f35a0ca40@amd.com/T/. As there's no dma_sync_single_for_*() in the DMA buffer accessed that because this migration operation should be sync properly and automatically. Give that there's might not be a performance problem in various cache sync policy of DMA sync. Therefore, in order to simplify the DMA direction setting alignment, let's set the DMA map direction as BIDIRECTIONAL. [ 150.834218] WARNING: CPU: 8 PID: 1812 at kernel/dma/debug.c:1028 check_unmap+0x1cc/0x930 [ 150.834225] Modules linked in: amdgpu(OE) amdxcp drm_exec(OE) gpu_sched drm_buddy(OE) drm_ttm_helper(OE) ttm(OE) drm_suballoc_helper(OE) drm_display_helper(OE) drm_kms_helper(OE) i2c_algo_bit rpcsec_gss_krb5 auth_rpcgss nfsv4 nfs lockd grace netfs xt_conntrack xt_MASQUERADE nf_conntrack_netlink xfrm_user xfrm_algo iptable_nat xt_addrtype iptable_filter br_netfilter nvme_fabrics overlay nfnetlink_cttimeout nfnetlink openvswitch nsh nf_conncount nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 libcrc32c bridge stp llc sch_fq_codel intel_rapl_msr amd_atl intel_rapl_common snd_hda_codec_realtek snd_hda_codec_generic snd_hda_scodec_component snd_hda_codec_hdmi snd_hda_intel snd_intel_dspcfg edac_mce_amd snd_pci_acp6x snd_hda_codec snd_acp_config snd_hda_core snd_hwdep snd_soc_acpi kvm_amd sunrpc snd_pcm kvm binfmt_misc snd_seq_midi crct10dif_pclmul snd_seq_midi_event ghash_clmulni_intel sha512_ssse3 snd_rawmidi nls_iso8859_1 sha256_ssse3 sha1_ssse3 snd_seq aesni_intel snd_seq_device crypto_simd snd_timer cryptd input_leds [ 150.834310] wmi_bmof serio_raw k10temp rapl snd sp5100_tco ipmi_devintf soundcore ccp ipmi_msghandler cm32181 industrialio mac_hid msr parport_pc ppdev lp parport efi_pstore drm(OE) ip_tables x_tables pci_stub crc32_pclmul nvme ahci libahci i2c_piix4 r8169 nvme_core i2c_designware_pci realtek i2c_ccgx_ucsi video wmi hid_generic cdc_ether usbnet usbhid hid r8152 mii [ 150.834354] CPU: 8 PID: 1812 Comm: rocrtst64 Tainted: G OE 6.10.0-custom #492 [ 150.834358] Hardware name: AMD Majolica-RN/Majolica-RN, BIOS RMJ1009A 06/13/2021 [ 150.834360] RIP: 0010:check_unmap+0x1cc/0x930 [ 150.834363] Code: c0 4c 89 4d c8 e8 34 bf 86 00 4c 8b 4d c8 4c 8b 45 c0 48 8b 4d b8 48 89 c6 41 57 4c 89 ea 48 c7 c7 80 49 b4 84 e8 b4 81 f3 ff <0f> 0b 48 c7 c7 04 83 ac 84 e8 76 ba fc ff 41 8b 76 4c 49 8d 7e 50 [ 150.834365] RSP: 0018:ffffaac5023739e0 EFLAGS: 00010086 [ 150.834368] RAX: 0000000000000000 RBX: ffffffff8566a2e0 RCX: 0000000000000027 [ 150.834370] RDX: ffff8f6a8f621688 RSI: 0000000000000001 RDI: ffff8f6a8f621680 [ 150.834372] RBP: ffffaac502373a30 R08: 00000000000000c9 R09: ffffaac502373850 [ 150.834373] R10: ffffaac502373848 R11: ffffffff84f46328 R12: ffffaac502373a40 [ 150.834375] R13: ffff8f6741045330 R14: ffff8f6741a77700 R15: ffffffff84ac831b [ 150.834377] FS: 00007faf0fc94c00(0000) GS:ffff8f6a8f600000(0000) knlGS:0000000000000000 [ 150.834379] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 150.834381] CR2: 00007faf0b600020 CR3: 000000010a52e000 CR4: 0000000000350ef0 [ 150.834383] Call Trace: [ 150.834385] [ 150.834387] ? show_regs+0x6d/0x80 [ 150.834393] ? __warn+0x8c/0x140 [ 150.834397] ? check_unmap+0x1cc/0x930 [ 150.834400] ? report_bug+0x193/0x1a0 [ 150.834406] ? handle_bug+0x46/0x80 [ 150.834410] ? exc_invalid_op+0x1d/0x80 [ 150.834413] ? asm_exc_invalid_op+0x1f/0x30 [ 150.834420] ? check_unmap+0x1cc/0x930 [ 150.834425] debug_dma_unmap_page+0x86/0x90 [ 150.834431] ? srso_return_thunk+0x5/0x5f [ 150.834435] ? rmap_walk+0x28/0x50 [ 150.834438] ? srso_return_thunk+0x5/0x5f [ 150.834441] ? remove_migration_ptes+0x79/0x80 [ 150.834445] ? srso_return_thunk+0x5/0x5f [ 150.834448] dma_unmap_page_attrs+0xfa/0x1d0 [ 150.834453] svm_range_dma_unmap_dev+0x8a/0xf0 [amdgpu] [ 150.834710] svm_migrate_ram_to_vram+0x361/0x740 [amdgpu] [ 150.834914] svm_migrate_to_vram+0xa8/0xe0 [amdgpu] [ 150.835111] svm_range_set_attr+0xff2/0x1450 [amdgpu] [ 150.835311] svm_ioctl+0x4a/0x50 [amdgpu] [ 150.835510] kfd_ioctl_svm+0x54/0x90 [amdgpu] [ 150.835701] kfd_ioctl+0x3c2/0x530 [amdgpu] [ 150.835888] ? __pfx_kfd_ioctl_svm+0x10/0x10 [amdgpu] [ 150.836075] ? srso_return_thunk+0x5/0x5f [ 150.836080] ? tomoyo_file_ioctl+0x20/0x30 [ 150.836086] __x64_sys_ioctl+0x9c/0xd0 [ 150.836091] x64_sys_call+0x1219/0x20d0 [ 150.836095] do_syscall_64+0x51/0x120 [ 150.836098] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ 150.836102] RIP: 0033:0x7faf0f11a94f [ 150.836105] Code: 00 48 89 44 24 18 31 c0 48 8d 44 24 60 c7 04 24 10 00 00 00 48 89 44 24 08 48 8d 44 24 20 48 89 44 24 10 b8 10 00 00 00 0f 05 <41> 89 c0 3d 00 f0 ff ff 77 1f 48 8b 44 24 18 64 48 2b 04 25 28 00 [ 150.836107] RSP: 002b:00007ffeced26bc0 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [ 150.836110] RAX: ffffffffffffffda RBX: 000055c683528fb0 RCX: 00007faf0f11a94f [ 150.836112] RDX: 00007ffeced26c60 RSI: 00000000c0484b20 RDI: 0000000000000003 [ 150.836114] RBP: 00007ffeced26c50 R08: 0000000000000000 R09: 0000000000000001 [ 150.836115] R10: 0000000000000032 R11: 0000000000000246 R12: 000055c683528bd0 [ 150.836117] R13: 0000000000000000 R14: 0000000000000021 R15: 0000000000000000 [ 150.836122] [ 150.836124] ---[ end trace 0000000000000000 ]--- Signed-off-by: Prike Liang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin (cherry picked from commit d0fafe701c6aca785cc8685f9f76fdc73e662f47) --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 3263b5fa182d2..f99e3b812ee44 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -319,7 +319,7 @@ svm_migrate_copy_to_vram(struct kfd_node *node, struct svm_range *prange, spage = migrate_pfn_to_page(migrate->src[i]); if (spage && !is_zone_device_page(spage)) { src[i] = dma_map_page(dev, spage, 0, PAGE_SIZE, - DMA_TO_DEVICE); + DMA_BIDIRECTIONAL); r = dma_mapping_error(dev, src[i]); if (r) { dev_err(dev, "%s: fail %d dma_map_page\n", @@ -634,7 +634,7 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange, goto out_oom; } - dst[i] = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_FROM_DEVICE); + dst[i] = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_BIDIRECTIONAL); r = dma_mapping_error(dev, dst[i]); if (r) { dev_err(adev->dev, "%s: fail %d dma_map_page\n", __func__, r); From 481bb15ed7e649db1777da25fa1e9ffd653eff45 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 3 Dec 2024 11:53:27 +0000 Subject: [PATCH 170/216] btrfs: flush delalloc workers queue before stopping cleaner kthread during unmount [ Upstream commit f10bef73fb355e3fc85e63a50386798be68ff486 ] During the unmount path, at close_ctree(), we first stop the cleaner kthread, using kthread_stop() which frees the associated task_struct, and then stop and destroy all the work queues. However after we stopped the cleaner we may still have a worker from the delalloc_workers queue running inode.c:submit_compressed_extents(), which calls btrfs_add_delayed_iput(), which in turn tries to wake up the cleaner kthread - which was already destroyed before, resulting in a use-after-free on the task_struct. Syzbot reported this with the following stack traces: BUG: KASAN: slab-use-after-free in __lock_acquire+0x78/0x2100 kernel/locking/lockdep.c:5089 Read of size 8 at addr ffff8880259d2818 by task kworker/u8:3/52 CPU: 1 UID: 0 PID: 52 Comm: kworker/u8:3 Not tainted 6.13.0-rc1-syzkaller-00002-gcdd30ebb1b9f #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/13/2024 Workqueue: btrfs-delalloc btrfs_work_helper Call Trace: __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0x241/0x360 lib/dump_stack.c:120 print_address_description mm/kasan/report.c:378 [inline] print_report+0x169/0x550 mm/kasan/report.c:489 kasan_report+0x143/0x180 mm/kasan/report.c:602 __lock_acquire+0x78/0x2100 kernel/locking/lockdep.c:5089 lock_acquire+0x1ed/0x550 kernel/locking/lockdep.c:5849 __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline] _raw_spin_lock_irqsave+0xd5/0x120 kernel/locking/spinlock.c:162 class_raw_spinlock_irqsave_constructor include/linux/spinlock.h:551 [inline] try_to_wake_up+0xc2/0x1470 kernel/sched/core.c:4205 submit_compressed_extents+0xdf/0x16e0 fs/btrfs/inode.c:1615 run_ordered_work fs/btrfs/async-thread.c:288 [inline] btrfs_work_helper+0x96f/0xc40 fs/btrfs/async-thread.c:324 process_one_work kernel/workqueue.c:3229 [inline] process_scheduled_works+0xa66/0x1840 kernel/workqueue.c:3310 worker_thread+0x870/0xd30 kernel/workqueue.c:3391 kthread+0x2f0/0x390 kernel/kthread.c:389 ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244 Allocated by task 2: kasan_save_stack mm/kasan/common.c:47 [inline] kasan_save_track+0x3f/0x80 mm/kasan/common.c:68 unpoison_slab_object mm/kasan/common.c:319 [inline] __kasan_slab_alloc+0x66/0x80 mm/kasan/common.c:345 kasan_slab_alloc include/linux/kasan.h:250 [inline] slab_post_alloc_hook mm/slub.c:4104 [inline] slab_alloc_node mm/slub.c:4153 [inline] kmem_cache_alloc_node_noprof+0x1d9/0x380 mm/slub.c:4205 alloc_task_struct_node kernel/fork.c:180 [inline] dup_task_struct+0x57/0x8c0 kernel/fork.c:1113 copy_process+0x5d1/0x3d50 kernel/fork.c:2225 kernel_clone+0x223/0x870 kernel/fork.c:2807 kernel_thread+0x1bc/0x240 kernel/fork.c:2869 create_kthread kernel/kthread.c:412 [inline] kthreadd+0x60d/0x810 kernel/kthread.c:767 ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244 Freed by task 24: kasan_save_stack mm/kasan/common.c:47 [inline] kasan_save_track+0x3f/0x80 mm/kasan/common.c:68 kasan_save_free_info+0x40/0x50 mm/kasan/generic.c:582 poison_slab_object mm/kasan/common.c:247 [inline] __kasan_slab_free+0x59/0x70 mm/kasan/common.c:264 kasan_slab_free include/linux/kasan.h:233 [inline] slab_free_hook mm/slub.c:2338 [inline] slab_free mm/slub.c:4598 [inline] kmem_cache_free+0x195/0x410 mm/slub.c:4700 put_task_struct include/linux/sched/task.h:144 [inline] delayed_put_task_struct+0x125/0x300 kernel/exit.c:227 rcu_do_batch kernel/rcu/tree.c:2567 [inline] rcu_core+0xaaa/0x17a0 kernel/rcu/tree.c:2823 handle_softirqs+0x2d4/0x9b0 kernel/softirq.c:554 run_ksoftirqd+0xca/0x130 kernel/softirq.c:943 smpboot_thread_fn+0x544/0xa30 kernel/smpboot.c:164 kthread+0x2f0/0x390 kernel/kthread.c:389 ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244 Last potentially related work creation: kasan_save_stack+0x3f/0x60 mm/kasan/common.c:47 __kasan_record_aux_stack+0xac/0xc0 mm/kasan/generic.c:544 __call_rcu_common kernel/rcu/tree.c:3086 [inline] call_rcu+0x167/0xa70 kernel/rcu/tree.c:3190 context_switch kernel/sched/core.c:5372 [inline] __schedule+0x1803/0x4be0 kernel/sched/core.c:6756 __schedule_loop kernel/sched/core.c:6833 [inline] schedule+0x14b/0x320 kernel/sched/core.c:6848 schedule_timeout+0xb0/0x290 kernel/time/sleep_timeout.c:75 do_wait_for_common kernel/sched/completion.c:95 [inline] __wait_for_common kernel/sched/completion.c:116 [inline] wait_for_common kernel/sched/completion.c:127 [inline] wait_for_completion+0x355/0x620 kernel/sched/completion.c:148 kthread_stop+0x19e/0x640 kernel/kthread.c:712 close_ctree+0x524/0xd60 fs/btrfs/disk-io.c:4328 generic_shutdown_super+0x139/0x2d0 fs/super.c:642 kill_anon_super+0x3b/0x70 fs/super.c:1237 btrfs_kill_super+0x41/0x50 fs/btrfs/super.c:2112 deactivate_locked_super+0xc4/0x130 fs/super.c:473 cleanup_mnt+0x41f/0x4b0 fs/namespace.c:1373 task_work_run+0x24f/0x310 kernel/task_work.c:239 ptrace_notify+0x2d2/0x380 kernel/signal.c:2503 ptrace_report_syscall include/linux/ptrace.h:415 [inline] ptrace_report_syscall_exit include/linux/ptrace.h:477 [inline] syscall_exit_work+0xc7/0x1d0 kernel/entry/common.c:173 syscall_exit_to_user_mode_prepare kernel/entry/common.c:200 [inline] __syscall_exit_to_user_mode_work kernel/entry/common.c:205 [inline] syscall_exit_to_user_mode+0x24a/0x340 kernel/entry/common.c:218 do_syscall_64+0x100/0x230 arch/x86/entry/common.c:89 entry_SYSCALL_64_after_hwframe+0x77/0x7f The buggy address belongs to the object at ffff8880259d1e00 which belongs to the cache task_struct of size 7424 The buggy address is located 2584 bytes inside of freed 7424-byte region [ffff8880259d1e00, ffff8880259d3b00) The buggy address belongs to the physical page: page: refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x259d0 head: order:3 mapcount:0 entire_mapcount:0 nr_pages_mapped:0 pincount:0 memcg:ffff88802f4b56c1 flags: 0xfff00000000040(head|node=0|zone=1|lastcpupid=0x7ff) page_type: f5(slab) raw: 00fff00000000040 ffff88801bafe500 dead000000000100 dead000000000122 raw: 0000000000000000 0000000000040004 00000001f5000000 ffff88802f4b56c1 head: 00fff00000000040 ffff88801bafe500 dead000000000100 dead000000000122 head: 0000000000000000 0000000000040004 00000001f5000000 ffff88802f4b56c1 head: 00fff00000000003 ffffea0000967401 ffffffffffffffff 0000000000000000 head: 0000000000000008 0000000000000000 00000000ffffffff 0000000000000000 page dumped because: kasan: bad access detected page_owner tracks the page as allocated page last allocated via order 3, migratetype Unmovable, gfp_mask 0xd20c0(__GFP_IO|__GFP_FS|__GFP_NOWARN|__GFP_NORETRY|__GFP_COMP|__GFP_NOMEMALLOC), pid 12, tgid 12 (kworker/u8:1), ts 7328037942, free_ts 0 set_page_owner include/linux/page_owner.h:32 [inline] post_alloc_hook+0x1f3/0x230 mm/page_alloc.c:1556 prep_new_page mm/page_alloc.c:1564 [inline] get_page_from_freelist+0x3651/0x37a0 mm/page_alloc.c:3474 __alloc_pages_noprof+0x292/0x710 mm/page_alloc.c:4751 alloc_pages_mpol_noprof+0x3e8/0x680 mm/mempolicy.c:2265 alloc_slab_page+0x6a/0x140 mm/slub.c:2408 allocate_slab+0x5a/0x2f0 mm/slub.c:2574 new_slab mm/slub.c:2627 [inline] ___slab_alloc+0xcd1/0x14b0 mm/slub.c:3815 __slab_alloc+0x58/0xa0 mm/slub.c:3905 __slab_alloc_node mm/slub.c:3980 [inline] slab_alloc_node mm/slub.c:4141 [inline] kmem_cache_alloc_node_noprof+0x269/0x380 mm/slub.c:4205 alloc_task_struct_node kernel/fork.c:180 [inline] dup_task_struct+0x57/0x8c0 kernel/fork.c:1113 copy_process+0x5d1/0x3d50 kernel/fork.c:2225 kernel_clone+0x223/0x870 kernel/fork.c:2807 user_mode_thread+0x132/0x1a0 kernel/fork.c:2885 call_usermodehelper_exec_work+0x5c/0x230 kernel/umh.c:171 process_one_work kernel/workqueue.c:3229 [inline] process_scheduled_works+0xa66/0x1840 kernel/workqueue.c:3310 worker_thread+0x870/0xd30 kernel/workqueue.c:3391 page_owner free stack trace missing Memory state around the buggy address: ffff8880259d2700: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8880259d2780: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff8880259d2800: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff8880259d2880: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8880259d2900: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== Fix this by flushing the delalloc workers queue before stopping the cleaner kthread. Reported-by: syzbot+b7cf50a0c173770dcb14@syzkaller.appspotmail.com Link: https://lore.kernel.org/linux-btrfs/674ed7e8.050a0220.48a03.0031.GAE@google.com/ Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sasha Levin (cherry picked from commit 35916b2f96505a18dc7242a115611b718d9de725) --- fs/btrfs/disk-io.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8ec411eb9c9b0..967c6b5dd0a43 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -4323,6 +4323,15 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info) * already the cleaner, but below we run all pending delayed iputs. */ btrfs_flush_workqueue(fs_info->fixup_workers); + /* + * Similar case here, we have to wait for delalloc workers before we + * proceed below and stop the cleaner kthread, otherwise we trigger a + * use-after-tree on the cleaner kthread task_struct when a delalloc + * worker running submit_compressed_extents() adds a delayed iput, which + * does a wake up on the cleaner kthread, which was already freed below + * when we call kthread_stop(). + */ + btrfs_flush_workqueue(fs_info->delalloc_workers); /* * After we parked the cleaner kthread, ordered extents may have From 766a133b2d7bf09566c9ef553ac55f29ece084dc Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sat, 7 Dec 2024 14:37:53 +0100 Subject: [PATCH 171/216] ALSA: hda/ca0132: Use standard HD-audio quirk matching helpers [ Upstream commit 7c005292e20ac53dfa601bf2a7375fd4815511ad ] CA0132 used the PCI SSID lookup helper that doesn't support the model string matching or quirk aliasing. Replace it with the standard HD-audio quirk helpers for supporting those, and add the definition of the model strings for supported quirks, too. There should be no visible change to the outside for the working system, but the driver will parse the model option and apply the quirk based on it from now on. Link: https://patch.msgid.link/20241207133754.3658-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin (cherry picked from commit 0d5e2d476000cfc79d137ec28e7410a0b1979e02) --- sound/pci/hda/patch_ca0132.c | 37 ++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/sound/pci/hda/patch_ca0132.c b/sound/pci/hda/patch_ca0132.c index 748a3c40966e9..27e48fdbbf3aa 100644 --- a/sound/pci/hda/patch_ca0132.c +++ b/sound/pci/hda/patch_ca0132.c @@ -1134,7 +1134,6 @@ struct ca0132_spec { struct hda_codec *codec; struct delayed_work unsol_hp_work; - int quirk; #ifdef ENABLE_TUNING_CONTROLS long cur_ctl_vals[TUNING_CTLS_COUNT]; @@ -1166,7 +1165,6 @@ struct ca0132_spec { * CA0132 quirks table */ enum { - QUIRK_NONE, QUIRK_ALIENWARE, QUIRK_ALIENWARE_M17XR4, QUIRK_SBZ, @@ -1176,10 +1174,11 @@ enum { QUIRK_R3D, QUIRK_AE5, QUIRK_AE7, + QUIRK_NONE = HDA_FIXUP_ID_NOT_SET, }; #ifdef CONFIG_PCI -#define ca0132_quirk(spec) ((spec)->quirk) +#define ca0132_quirk(spec) ((spec)->codec->fixup_id) #define ca0132_use_pci_mmio(spec) ((spec)->use_pci_mmio) #define ca0132_use_alt_functions(spec) ((spec)->use_alt_functions) #define ca0132_use_alt_controls(spec) ((spec)->use_alt_controls) @@ -1293,7 +1292,7 @@ static const struct hda_pintbl ae7_pincfgs[] = { {} }; -static const struct snd_pci_quirk ca0132_quirks[] = { +static const struct hda_quirk ca0132_quirks[] = { SND_PCI_QUIRK(0x1028, 0x057b, "Alienware M17x R4", QUIRK_ALIENWARE_M17XR4), SND_PCI_QUIRK(0x1028, 0x0685, "Alienware 15 2015", QUIRK_ALIENWARE), SND_PCI_QUIRK(0x1028, 0x0688, "Alienware 17 2015", QUIRK_ALIENWARE), @@ -1316,6 +1315,19 @@ static const struct snd_pci_quirk ca0132_quirks[] = { {} }; +static const struct hda_model_fixup ca0132_quirk_models[] = { + { .id = QUIRK_ALIENWARE, .name = "alienware" }, + { .id = QUIRK_ALIENWARE_M17XR4, .name = "alienware-m17xr4" }, + { .id = QUIRK_SBZ, .name = "sbz" }, + { .id = QUIRK_ZXR, .name = "zxr" }, + { .id = QUIRK_ZXR_DBPRO, .name = "zxr-dbpro" }, + { .id = QUIRK_R3DI, .name = "r3di" }, + { .id = QUIRK_R3D, .name = "r3d" }, + { .id = QUIRK_AE5, .name = "ae5" }, + { .id = QUIRK_AE7, .name = "ae7" }, + {} +}; + /* Output selection quirk info structures. */ #define MAX_QUIRK_MMIO_GPIO_SET_VALS 3 #define MAX_QUIRK_SCP_SET_VALS 2 @@ -9962,17 +9974,15 @@ static int ca0132_prepare_verbs(struct hda_codec *codec) */ static void sbz_detect_quirk(struct hda_codec *codec) { - struct ca0132_spec *spec = codec->spec; - switch (codec->core.subsystem_id) { case 0x11020033: - spec->quirk = QUIRK_ZXR; + codec->fixup_id = QUIRK_ZXR; break; case 0x1102003f: - spec->quirk = QUIRK_ZXR_DBPRO; + codec->fixup_id = QUIRK_ZXR_DBPRO; break; default: - spec->quirk = QUIRK_SBZ; + codec->fixup_id = QUIRK_SBZ; break; } } @@ -9981,7 +9991,6 @@ static int patch_ca0132(struct hda_codec *codec) { struct ca0132_spec *spec; int err; - const struct snd_pci_quirk *quirk; codec_dbg(codec, "patch_ca0132\n"); @@ -9992,11 +10001,7 @@ static int patch_ca0132(struct hda_codec *codec) spec->codec = codec; /* Detect codec quirk */ - quirk = snd_pci_quirk_lookup(codec->bus->pci, ca0132_quirks); - if (quirk) - spec->quirk = quirk->value; - else - spec->quirk = QUIRK_NONE; + snd_hda_pick_fixup(codec, ca0132_quirk_models, ca0132_quirks, NULL); if (ca0132_quirk(spec) == QUIRK_SBZ) sbz_detect_quirk(codec); @@ -10073,7 +10078,7 @@ static int patch_ca0132(struct hda_codec *codec) spec->mem_base = pci_iomap(codec->bus->pci, 2, 0xC20); if (spec->mem_base == NULL) { codec_warn(codec, "pci_iomap failed! Setting quirk to QUIRK_NONE."); - spec->quirk = QUIRK_NONE; + codec->fixup_id = QUIRK_NONE; } } #endif From 03739901415bfecb26d95603ce8203c7e60885d3 Mon Sep 17 00:00:00 2001 From: Vasiliy Kovalev Date: Sat, 7 Dec 2024 23:18:36 +0300 Subject: [PATCH 172/216] ALSA: hda/realtek: Add new alc2xx-fixup-headset-mic model [ Upstream commit 50db91fccea0da5c669bc68e2429e8de303758d3 ] Introduces the alc2xx-fixup-headset-mic model to simplify enabling headset microphones on ALC2XX codecs. Many recent configurations, as well as older systems that lacked this fix for a long time, leave headset microphones inactive by default. This addition provides a flexible workaround using the existing ALC2XX_FIXUP_HEADSET_MIC quirk. Signed-off-by: Vasiliy Kovalev Link: https://patch.msgid.link/20241207201836.6879-1-kovalev@altlinux.org Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin (cherry picked from commit 7523dd63ab22129b1acbcbadef56fc7894eb68ed) --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 29d7eb8c6bec3..031cfc4744c01 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10631,6 +10631,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = { {.id = ALC255_FIXUP_ACER_HEADPHONE_AND_MIC, .name = "alc255-acer-headphone-and-mic"}, {.id = ALC285_FIXUP_HP_GPIO_AMP_INIT, .name = "alc285-hp-amp-init"}, {.id = ALC236_FIXUP_LENOVO_INV_DMIC, .name = "alc236-fixup-lenovo-inv-mic"}, + {.id = ALC2XX_FIXUP_HEADSET_MIC, .name = "alc2xx-fixup-headset-mic"}, {} }; #define ALC225_STANDARD_PINS \ From 77f6af13c11e5b66f495f7a513eff7d0b58a23c9 Mon Sep 17 00:00:00 2001 From: Adrian Ratiu Date: Mon, 9 Dec 2024 11:05:28 +0200 Subject: [PATCH 173/216] sound: usb: enable DSD output for ddHiFi TC44C [ Upstream commit c84bd6c810d1880194fea2229c7086e4b73fddc1 ] This is a UAC 2 DAC capable of raw DSD on intf 2 alt 4: Bus 007 Device 004: ID 262a:9302 SAVITECH Corp. TC44C Device Descriptor: bLength 18 bDescriptorType 1 bcdUSB 2.00 bDeviceClass 239 Miscellaneous Device bDeviceSubClass 2 [unknown] bDeviceProtocol 1 Interface Association bMaxPacketSize0 64 idVendor 0x262a SAVITECH Corp. idProduct 0x9302 TC44C bcdDevice 0.01 iManufacturer 1 DDHIFI iProduct 2 TC44C iSerial 6 5000000001 ....... Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 2 bAlternateSetting 4 bNumEndpoints 2 bInterfaceClass 1 Audio bInterfaceSubClass 2 Streaming bInterfaceProtocol 32 iInterface 0 AudioStreaming Interface Descriptor: bLength 16 bDescriptorType 36 bDescriptorSubtype 1 (AS_GENERAL) bTerminalLink 3 bmControls 0x00 bFormatType 1 bmFormats 0x80000000 bNrChannels 2 bmChannelConfig 0x00000000 iChannelNames 0 ....... Signed-off-by: Adrian Ratiu Link: https://patch.msgid.link/20241209090529.16134-1-adrian.ratiu@collabora.com Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin (cherry picked from commit 325370be06761d3d71df082933aae87f3a4104f1) --- sound/usb/quirks.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 8eed8d9742fda..ec81b47c41c9e 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -2225,6 +2225,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_DSD_RAW), DEVICE_FLG(0x2522, 0x0007, /* LH Labs Geek Out HD Audio 1V5 */ QUIRK_FLAG_SET_IFACE_FIRST), + DEVICE_FLG(0x262a, 0x9302, /* ddHiFi TC44C */ + QUIRK_FLAG_DSD_RAW), DEVICE_FLG(0x2708, 0x0002, /* Audient iD14 */ QUIRK_FLAG_IGNORE_CTL_ERROR), DEVICE_FLG(0x2912, 0x30c8, /* Audioengine D1 */ From 4df8393e7ef2a5e635e554f5d187745801ce977c Mon Sep 17 00:00:00 2001 From: Adrian Ratiu Date: Mon, 9 Dec 2024 11:05:29 +0200 Subject: [PATCH 174/216] sound: usb: format: don't warn that raw DSD is unsupported [ Upstream commit b50a3e98442b8d72f061617c7f7a71f7dba19484 ] UAC 2 & 3 DAC's set bit 31 of the format to signal support for a RAW_DATA type, typically used for DSD playback. This is correctly tested by (format & UAC*_FORMAT_TYPE_I_RAW_DATA), fp->dsd_raw = true; and call snd_usb_interface_dsd_format_quirks(), however a confusing and unnecessary message gets printed because the bit is not properly tested in the last "unsupported" if test: if (format & ~0x3F) { ... } For example the output: usb 7-1: new high-speed USB device number 5 using xhci_hcd usb 7-1: New USB device found, idVendor=262a, idProduct=9302, bcdDevice=0.01 usb 7-1: New USB device strings: Mfr=1, Product=2, SerialNumber=6 usb 7-1: Product: TC44C usb 7-1: Manufacturer: TC44C usb 7-1: SerialNumber: 5000000001 hid-generic 0003:262A:9302.001E: No inputs registered, leaving hid-generic 0003:262A:9302.001E: hidraw6: USB HID v1.00 Device [DDHIFI TC44C] on usb-0000:08:00.3-1/input0 usb 7-1: 2:4 : unsupported format bits 0x100000000 This last "unsupported format" is actually wrong: we know the format is a RAW_DATA which we assume is DSD, so there is no need to print the confusing message. This we unset bit 31 of the format after recognizing it, to avoid the message. Suggested-by: Takashi Iwai Signed-off-by: Adrian Ratiu Link: https://patch.msgid.link/20241209090529.16134-2-adrian.ratiu@collabora.com Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin (cherry picked from commit 73a30cb3e9802ef4c6eb36ae0ab63c04017f7a69) --- sound/usb/format.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/sound/usb/format.c b/sound/usb/format.c index 3b45d0ee76938..3b3a5ea6fcbfc 100644 --- a/sound/usb/format.c +++ b/sound/usb/format.c @@ -60,6 +60,8 @@ static u64 parse_audio_format_i_type(struct snd_usb_audio *chip, pcm_formats |= SNDRV_PCM_FMTBIT_SPECIAL; /* flag potentially raw DSD capable altsettings */ fp->dsd_raw = true; + /* clear special format bit to avoid "unsupported format" msg below */ + format &= ~UAC2_FORMAT_TYPE_I_RAW_DATA; } format <<= 1; @@ -71,8 +73,11 @@ static u64 parse_audio_format_i_type(struct snd_usb_audio *chip, sample_width = as->bBitResolution; sample_bytes = as->bSubslotSize; - if (format & UAC3_FORMAT_TYPE_I_RAW_DATA) + if (format & UAC3_FORMAT_TYPE_I_RAW_DATA) { pcm_formats |= SNDRV_PCM_FMTBIT_SPECIAL; + /* clear special format bit to avoid "unsupported format" msg below */ + format &= ~UAC3_FORMAT_TYPE_I_RAW_DATA; + } format <<= 1; break; From 1ae666c5903e308be060160c62b0d541a084999a Mon Sep 17 00:00:00 2001 From: Anton Protopopov Date: Tue, 10 Dec 2024 11:42:45 +0000 Subject: [PATCH 175/216] bpf: fix potential error return [ Upstream commit c4441ca86afe4814039ee1b32c39d833c1a16bbc ] The bpf_remove_insns() function returns WARN_ON_ONCE(error), where error is a result of bpf_adj_branches(), and thus should be always 0 However, if for any reason it is not 0, then it will be converted to boolean by WARN_ON_ONCE and returned to user space as 1, not an actual error value. Fix this by returning the original err after the WARN check. Signed-off-by: Anton Protopopov Acked-by: Jiri Olsa Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20241210114245.836164-1-aspsk@isovalent.com Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin (cherry picked from commit f53b37313ab630fff194652475a1f6e8d6b32078) --- kernel/bpf/core.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 58ee17f429a33..02f327f05fd61 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -529,6 +529,8 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, int bpf_remove_insns(struct bpf_prog *prog, u32 off, u32 cnt) { + int err; + /* Branch offsets can't overflow when program is shrinking, no need * to call bpf_adj_branches(..., true) here */ @@ -536,7 +538,9 @@ int bpf_remove_insns(struct bpf_prog *prog, u32 off, u32 cnt) sizeof(struct bpf_insn) * (prog->len - off - cnt)); prog->len -= cnt; - return WARN_ON_ONCE(bpf_adj_branches(prog, off, off + cnt, off, false)); + err = bpf_adj_branches(prog, off, off + cnt, off, false); + WARN_ON_ONCE(err); + return err; } static void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp) From f88e2bf06641821c1f24fc379a1f8f9f6135ce5a Mon Sep 17 00:00:00 2001 From: Hobin Woo Date: Thu, 5 Dec 2024 11:31:19 +0900 Subject: [PATCH 176/216] ksmbd: retry iterate_dir in smb2_query_dir [ Upstream commit 2b904d61a97e8ba79e3bc216ba290fd7e1d85028 ] Some file systems do not ensure that the single call of iterate_dir reaches the end of the directory. For example, FUSE fetches entries from a daemon using 4KB buffer and stops fetching if entries exceed the buffer. And then an actor of caller, KSMBD, is used to fill the entries from the buffer. Thus, pattern searching on FUSE, files located after the 4KB could not be found and STATUS_NO_SUCH_FILE was returned. Signed-off-by: Hobin Woo Reviewed-by: Sungjong Seo Reviewed-by: Namjae Jeon Tested-by: Yoonho Shin Acked-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Sasha Levin (cherry picked from commit 2f75da8294bf1aa69cce19a9de578d4ee06212e3) --- fs/smb/server/smb2pdu.c | 12 +++++++++++- fs/smb/server/vfs.h | 1 + 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index cd530b9a00caa..7216e2cc498b7 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -4225,6 +4225,7 @@ static bool __query_dir(struct dir_context *ctx, const char *name, int namlen, /* dot and dotdot entries are already reserved */ if (!strcmp(".", name) || !strcmp("..", name)) return true; + d_info->num_scan++; if (ksmbd_share_veto_filename(priv->work->tcon->share_conf, name)) return true; if (!match_pattern(name, namlen, priv->search_pattern)) @@ -4385,8 +4386,17 @@ int smb2_query_dir(struct ksmbd_work *work) query_dir_private.info_level = req->FileInformationClass; dir_fp->readdir_data.private = &query_dir_private; set_ctx_actor(&dir_fp->readdir_data.ctx, __query_dir); - +again: + d_info.num_scan = 0; rc = iterate_dir(dir_fp->filp, &dir_fp->readdir_data.ctx); + /* + * num_entry can be 0 if the directory iteration stops before reaching + * the end of the directory and no file is matched with the search + * pattern. + */ + if (rc >= 0 && !d_info.num_entry && d_info.num_scan && + d_info.out_buf_len > 0) + goto again; /* * req->OutputBufferLength is too small to contain even one entry. * In this case, it immediately returns OutputBufferLength 0 to client. diff --git a/fs/smb/server/vfs.h b/fs/smb/server/vfs.h index cb76f4b5bafe8..06903024a2d88 100644 --- a/fs/smb/server/vfs.h +++ b/fs/smb/server/vfs.h @@ -43,6 +43,7 @@ struct ksmbd_dir_info { char *rptr; int name_len; int out_buf_len; + int num_scan; int num_entry; int data_count; int last_entry_offset; From d478a2c312c0fd29c5de79d5a7639d08f23a2f54 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Fri, 6 Dec 2024 17:25:25 +0900 Subject: [PATCH 177/216] ksmbd: set ATTR_CTIME flags when setting mtime [ Upstream commit 21e46a79bbe6c4e1aa73b3ed998130f2ff07b128 ] David reported that the new warning from setattr_copy_mgtime is coming like the following. [ 113.215316] ------------[ cut here ]------------ [ 113.215974] WARNING: CPU: 1 PID: 31 at fs/attr.c:300 setattr_copy+0x1ee/0x200 [ 113.219192] CPU: 1 UID: 0 PID: 31 Comm: kworker/1:1 Not tainted 6.13.0-rc1+ #234 [ 113.220127] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.2-3-gd478f380-rebuilt.opensuse.org 04/01/2014 [ 113.221530] Workqueue: ksmbd-io handle_ksmbd_work [ksmbd] [ 113.222220] RIP: 0010:setattr_copy+0x1ee/0x200 [ 113.222833] Code: 24 28 49 8b 44 24 30 48 89 53 58 89 43 6c 5b 41 5c 41 5d 41 5e 41 5f 5d c3 cc cc cc cc 48 89 df e8 77 d6 ff ff e9 cd fe ff ff <0f> 0b e9 be fe ff ff 66 0 [ 113.225110] RSP: 0018:ffffaf218010fb68 EFLAGS: 00010202 [ 113.225765] RAX: 0000000000000120 RBX: ffffa446815f8568 RCX: 0000000000000003 [ 113.226667] RDX: ffffaf218010fd38 RSI: ffffa446815f8568 RDI: ffffffff94eb03a0 [ 113.227531] RBP: ffffaf218010fb90 R08: 0000001a251e217d R09: 00000000675259fa [ 113.228426] R10: 0000000002ba8a6d R11: ffffa4468196c7a8 R12: ffffaf218010fd38 [ 113.229304] R13: 0000000000000120 R14: ffffffff94eb03a0 R15: 0000000000000000 [ 113.230210] FS: 0000000000000000(0000) GS:ffffa44739d00000(0000) knlGS:0000000000000000 [ 113.231215] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 113.232055] CR2: 00007efe0053d27e CR3: 000000000331a000 CR4: 00000000000006b0 [ 113.232926] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 113.233812] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 113.234797] Call Trace: [ 113.235116] [ 113.235393] ? __warn+0x73/0xd0 [ 113.235802] ? setattr_copy+0x1ee/0x200 [ 113.236299] ? report_bug+0xf3/0x1e0 [ 113.236757] ? handle_bug+0x4d/0x90 [ 113.237202] ? exc_invalid_op+0x13/0x60 [ 113.237689] ? asm_exc_invalid_op+0x16/0x20 [ 113.238185] ? setattr_copy+0x1ee/0x200 [ 113.238692] btrfs_setattr+0x80/0x820 [btrfs] [ 113.239285] ? get_stack_info_noinstr+0x12/0xf0 [ 113.239857] ? __module_address+0x22/0xa0 [ 113.240368] ? handle_ksmbd_work+0x6e/0x460 [ksmbd] [ 113.240993] ? __module_text_address+0x9/0x50 [ 113.241545] ? __module_address+0x22/0xa0 [ 113.242033] ? unwind_next_frame+0x10e/0x920 [ 113.242600] ? __pfx_stack_trace_consume_entry+0x10/0x10 [ 113.243268] notify_change+0x2c2/0x4e0 [ 113.243746] ? stack_depot_save_flags+0x27/0x730 [ 113.244339] ? set_file_basic_info+0x130/0x2b0 [ksmbd] [ 113.244993] set_file_basic_info+0x130/0x2b0 [ksmbd] [ 113.245613] ? process_scheduled_works+0xbe/0x310 [ 113.246181] ? worker_thread+0x100/0x240 [ 113.246696] ? kthread+0xc8/0x100 [ 113.247126] ? ret_from_fork+0x2b/0x40 [ 113.247606] ? ret_from_fork_asm+0x1a/0x30 [ 113.248132] smb2_set_info+0x63f/0xa70 [ksmbd] ksmbd is trying to set the atime and mtime via notify_change without also setting the ctime. so This patch add ATTR_CTIME flags when setting mtime to avoid a warning. Reported-by: David Disseldorp Signed-off-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Sasha Levin (cherry picked from commit 1d7ee876b8b96efc14e177a7fe8d45ac25d68849) --- fs/smb/server/smb2pdu.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 7216e2cc498b7..2884ebdc0eda0 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -6017,15 +6017,13 @@ static int set_file_basic_info(struct ksmbd_file *fp, attrs.ia_valid |= (ATTR_ATIME | ATTR_ATIME_SET); } - attrs.ia_valid |= ATTR_CTIME; if (file_info->ChangeTime) - attrs.ia_ctime = ksmbd_NTtimeToUnix(file_info->ChangeTime); - else - attrs.ia_ctime = inode_get_ctime(inode); + inode_set_ctime_to_ts(inode, + ksmbd_NTtimeToUnix(file_info->ChangeTime)); if (file_info->LastWriteTime) { attrs.ia_mtime = ksmbd_NTtimeToUnix(file_info->LastWriteTime); - attrs.ia_valid |= (ATTR_MTIME | ATTR_MTIME_SET); + attrs.ia_valid |= (ATTR_MTIME | ATTR_MTIME_SET | ATTR_CTIME); } if (file_info->Attributes) { @@ -6067,8 +6065,6 @@ static int set_file_basic_info(struct ksmbd_file *fp, return -EACCES; inode_lock(inode); - inode_set_ctime_to_ts(inode, attrs.ia_ctime); - attrs.ia_valid &= ~ATTR_CTIME; rc = notify_change(idmap, dentry, &attrs, NULL); inode_unlock(inode); } From 876497997613f31eb3e6e35fdbb5d2ef2e0b5741 Mon Sep 17 00:00:00 2001 From: Enzo Matsumiya Date: Tue, 10 Dec 2024 10:21:48 -0300 Subject: [PATCH 178/216] smb: client: destroy cfid_put_wq on module exit [ Upstream commit 633609c48a358134d3f8ef8241dff24841577f58 ] Fix potential problem in rmmod Signed-off-by: Enzo Matsumiya Signed-off-by: Steve French Signed-off-by: Sasha Levin (cherry picked from commit c6b1d01e7a9cc47a25d3798fba3837724d71192d) --- fs/smb/client/cifsfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index 6ed0f2548232f..bbb0ef18d7b8c 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -2015,6 +2015,7 @@ exit_cifs(void) destroy_workqueue(decrypt_wq); destroy_workqueue(fileinfo_put_wq); destroy_workqueue(serverclose_wq); + destroy_workqueue(cfid_put_wq); destroy_workqueue(cifsiod_wq); cifs_proc_clean(); } From 369fbe2bb8aeaefbb84a391a6cc834004a79a6d7 Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Mon, 9 Dec 2024 16:18:21 +0100 Subject: [PATCH 179/216] net: usb: qmi_wwan: add Telit FE910C04 compositions [ Upstream commit 3b58b53a26598209a7ad8259a5114ce71f7c3d64 ] Add the following Telit FE910C04 compositions: 0x10c0: rmnet + tty (AT/NMEA) + tty (AT) + tty (diag) T: Bus=02 Lev=01 Prnt=03 Port=06 Cnt=01 Dev#= 13 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1bc7 ProdID=10c0 Rev=05.15 S: Manufacturer=Telit Cinterion S: Product=FE910 S: SerialNumber=f71b8b32 C: #Ifs= 4 Cfg#= 1 Atr=e0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=50 Driver=qmi_wwan E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=82(I) Atr=03(Int.) MxPS= 8 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=60 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms 0x10c4: rmnet + tty (AT) + tty (AT) + tty (diag) T: Bus=02 Lev=01 Prnt=03 Port=06 Cnt=01 Dev#= 14 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1bc7 ProdID=10c4 Rev=05.15 S: Manufacturer=Telit Cinterion S: Product=FE910 S: SerialNumber=f71b8b32 C: #Ifs= 4 Cfg#= 1 Atr=e0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=50 Driver=qmi_wwan E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=82(I) Atr=03(Int.) MxPS= 8 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms 0x10c8: rmnet + tty (AT) + tty (diag) + DPL (data packet logging) + adb T: Bus=02 Lev=01 Prnt=03 Port=06 Cnt=01 Dev#= 17 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1bc7 ProdID=10c8 Rev=05.15 S: Manufacturer=Telit Cinterion S: Product=FE910 S: SerialNumber=f71b8b32 C: #Ifs= 5 Cfg#= 1 Atr=e0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=50 Driver=qmi_wwan E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=82(I) Atr=03(Int.) MxPS= 8 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 3 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=80 Driver=(none) E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=(none) E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms Signed-off-by: Daniele Palmas Link: https://patch.msgid.link/20241209151821.3688829-1-dnlplm@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin (cherry picked from commit d8ecb248c199cde2eb9dce864e16b7f31b3a6173) --- drivers/net/usb/qmi_wwan.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 89775b6d0699a..8e30df676eded 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1373,6 +1373,9 @@ static const struct usb_device_id products[] = { {QMI_QUIRK_SET_DTR(0x1bc7, 0x10a0, 0)}, /* Telit FN920C04 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x10a4, 0)}, /* Telit FN920C04 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x10a9, 0)}, /* Telit FN920C04 */ + {QMI_QUIRK_SET_DTR(0x1bc7, 0x10c0, 0)}, /* Telit FE910C04 */ + {QMI_QUIRK_SET_DTR(0x1bc7, 0x10c4, 0)}, /* Telit FE910C04 */ + {QMI_QUIRK_SET_DTR(0x1bc7, 0x10c8, 0)}, /* Telit FE910C04 */ {QMI_FIXED_INTF(0x1bc7, 0x1100, 3)}, /* Telit ME910 */ {QMI_FIXED_INTF(0x1bc7, 0x1101, 3)}, /* Telit ME910 dual modem */ {QMI_FIXED_INTF(0x1bc7, 0x1200, 5)}, /* Telit LE920 */ From c3814ebf1977295880f4956be6f852eed00dfd37 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Tue, 3 Dec 2024 16:07:32 -0500 Subject: [PATCH 180/216] Bluetooth: hci_core: Fix sleeping function called from invalid context [ Upstream commit 4d94f05558271654670d18c26c912da0c1c15549 ] This reworks hci_cb_list to not use mutex hci_cb_list_lock to avoid bugs like the bellow: BUG: sleeping function called from invalid context at kernel/locking/mutex.c:585 in_atomic(): 0, irqs_disabled(): 0, non_block: 0, pid: 5070, name: kworker/u9:2 preempt_count: 0, expected: 0 RCU nest depth: 1, expected: 0 4 locks held by kworker/u9:2/5070: #0: ffff888015be3948 ((wq_completion)hci0#2){+.+.}-{0:0}, at: process_one_work kernel/workqueue.c:3229 [inline] #0: ffff888015be3948 ((wq_completion)hci0#2){+.+.}-{0:0}, at: process_scheduled_works+0x8e0/0x1770 kernel/workqueue.c:3335 #1: ffffc90003b6fd00 ((work_completion)(&hdev->rx_work)){+.+.}-{0:0}, at: process_one_work kernel/workqueue.c:3230 [inline] #1: ffffc90003b6fd00 ((work_completion)(&hdev->rx_work)){+.+.}-{0:0}, at: process_scheduled_works+0x91b/0x1770 kernel/workqueue.c:3335 #2: ffff8880665d0078 (&hdev->lock){+.+.}-{3:3}, at: hci_le_create_big_complete_evt+0xcf/0xae0 net/bluetooth/hci_event.c:6914 #3: ffffffff8e132020 (rcu_read_lock){....}-{1:2}, at: rcu_lock_acquire include/linux/rcupdate.h:298 [inline] #3: ffffffff8e132020 (rcu_read_lock){....}-{1:2}, at: rcu_read_lock include/linux/rcupdate.h:750 [inline] #3: ffffffff8e132020 (rcu_read_lock){....}-{1:2}, at: hci_le_create_big_complete_evt+0xdb/0xae0 net/bluetooth/hci_event.c:6915 CPU: 0 PID: 5070 Comm: kworker/u9:2 Not tainted 6.8.0-syzkaller-08073-g480e035fc4c7 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 03/27/2024 Workqueue: hci0 hci_rx_work Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x241/0x360 lib/dump_stack.c:114 __might_resched+0x5d4/0x780 kernel/sched/core.c:10187 __mutex_lock_common kernel/locking/mutex.c:585 [inline] __mutex_lock+0xc1/0xd70 kernel/locking/mutex.c:752 hci_connect_cfm include/net/bluetooth/hci_core.h:2004 [inline] hci_le_create_big_complete_evt+0x3d9/0xae0 net/bluetooth/hci_event.c:6939 hci_event_func net/bluetooth/hci_event.c:7514 [inline] hci_event_packet+0xa53/0x1540 net/bluetooth/hci_event.c:7569 hci_rx_work+0x3e8/0xca0 net/bluetooth/hci_core.c:4171 process_one_work kernel/workqueue.c:3254 [inline] process_scheduled_works+0xa00/0x1770 kernel/workqueue.c:3335 worker_thread+0x86d/0xd70 kernel/workqueue.c:3416 kthread+0x2f0/0x390 kernel/kthread.c:388 ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:243 Reported-by: syzbot+2fb0835e0c9cefc34614@syzkaller.appspotmail.com Tested-by: syzbot+2fb0835e0c9cefc34614@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=2fb0835e0c9cefc34614 Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin (cherry picked from commit bef333418368c58690b501894324c09124e4614f) --- include/net/bluetooth/hci_core.h | 108 ++++++++++++++++++++----------- net/bluetooth/hci_core.c | 10 +-- net/bluetooth/iso.c | 6 ++ net/bluetooth/l2cap_core.c | 12 ++-- net/bluetooth/rfcomm/core.c | 6 ++ net/bluetooth/sco.c | 12 ++-- 6 files changed, 97 insertions(+), 57 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 1fc8e843c1619..2373ea839b229 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -801,7 +801,6 @@ struct hci_conn_params { extern struct list_head hci_dev_list; extern struct list_head hci_cb_list; extern rwlock_t hci_dev_list_lock; -extern struct mutex hci_cb_list_lock; #define hci_dev_set_flag(hdev, nr) set_bit((nr), (hdev)->dev_flags) #define hci_dev_clear_flag(hdev, nr) clear_bit((nr), (hdev)->dev_flags) @@ -1950,24 +1949,47 @@ struct hci_cb { char *name; + bool (*match) (struct hci_conn *conn); void (*connect_cfm) (struct hci_conn *conn, __u8 status); void (*disconn_cfm) (struct hci_conn *conn, __u8 status); void (*security_cfm) (struct hci_conn *conn, __u8 status, - __u8 encrypt); + __u8 encrypt); void (*key_change_cfm) (struct hci_conn *conn, __u8 status); void (*role_switch_cfm) (struct hci_conn *conn, __u8 status, __u8 role); }; +static inline void hci_cb_lookup(struct hci_conn *conn, struct list_head *list) +{ + struct hci_cb *cb, *cpy; + + rcu_read_lock(); + list_for_each_entry_rcu(cb, &hci_cb_list, list) { + if (cb->match && cb->match(conn)) { + cpy = kmalloc(sizeof(*cpy), GFP_ATOMIC); + if (!cpy) + break; + + *cpy = *cb; + INIT_LIST_HEAD(&cpy->list); + list_add_rcu(&cpy->list, list); + } + } + rcu_read_unlock(); +} + static inline void hci_connect_cfm(struct hci_conn *conn, __u8 status) { - struct hci_cb *cb; + struct list_head list; + struct hci_cb *cb, *tmp; + + INIT_LIST_HEAD(&list); + hci_cb_lookup(conn, &list); - mutex_lock(&hci_cb_list_lock); - list_for_each_entry(cb, &hci_cb_list, list) { + list_for_each_entry_safe(cb, tmp, &list, list) { if (cb->connect_cfm) cb->connect_cfm(conn, status); + kfree(cb); } - mutex_unlock(&hci_cb_list_lock); if (conn->connect_cfm_cb) conn->connect_cfm_cb(conn, status); @@ -1975,43 +1997,55 @@ static inline void hci_connect_cfm(struct hci_conn *conn, __u8 status) static inline void hci_disconn_cfm(struct hci_conn *conn, __u8 reason) { - struct hci_cb *cb; + struct list_head list; + struct hci_cb *cb, *tmp; + + INIT_LIST_HEAD(&list); + hci_cb_lookup(conn, &list); - mutex_lock(&hci_cb_list_lock); - list_for_each_entry(cb, &hci_cb_list, list) { + list_for_each_entry_safe(cb, tmp, &list, list) { if (cb->disconn_cfm) cb->disconn_cfm(conn, reason); + kfree(cb); } - mutex_unlock(&hci_cb_list_lock); if (conn->disconn_cfm_cb) conn->disconn_cfm_cb(conn, reason); } -static inline void hci_auth_cfm(struct hci_conn *conn, __u8 status) +static inline void hci_security_cfm(struct hci_conn *conn, __u8 status, + __u8 encrypt) { - struct hci_cb *cb; - __u8 encrypt; - - if (test_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags)) - return; + struct list_head list; + struct hci_cb *cb, *tmp; - encrypt = test_bit(HCI_CONN_ENCRYPT, &conn->flags) ? 0x01 : 0x00; + INIT_LIST_HEAD(&list); + hci_cb_lookup(conn, &list); - mutex_lock(&hci_cb_list_lock); - list_for_each_entry(cb, &hci_cb_list, list) { + list_for_each_entry_safe(cb, tmp, &list, list) { if (cb->security_cfm) cb->security_cfm(conn, status, encrypt); + kfree(cb); } - mutex_unlock(&hci_cb_list_lock); if (conn->security_cfm_cb) conn->security_cfm_cb(conn, status); } +static inline void hci_auth_cfm(struct hci_conn *conn, __u8 status) +{ + __u8 encrypt; + + if (test_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags)) + return; + + encrypt = test_bit(HCI_CONN_ENCRYPT, &conn->flags) ? 0x01 : 0x00; + + hci_security_cfm(conn, status, encrypt); +} + static inline void hci_encrypt_cfm(struct hci_conn *conn, __u8 status) { - struct hci_cb *cb; __u8 encrypt; if (conn->state == BT_CONFIG) { @@ -2038,40 +2072,38 @@ static inline void hci_encrypt_cfm(struct hci_conn *conn, __u8 status) conn->sec_level = conn->pending_sec_level; } - mutex_lock(&hci_cb_list_lock); - list_for_each_entry(cb, &hci_cb_list, list) { - if (cb->security_cfm) - cb->security_cfm(conn, status, encrypt); - } - mutex_unlock(&hci_cb_list_lock); - - if (conn->security_cfm_cb) - conn->security_cfm_cb(conn, status); + hci_security_cfm(conn, status, encrypt); } static inline void hci_key_change_cfm(struct hci_conn *conn, __u8 status) { - struct hci_cb *cb; + struct list_head list; + struct hci_cb *cb, *tmp; + + INIT_LIST_HEAD(&list); + hci_cb_lookup(conn, &list); - mutex_lock(&hci_cb_list_lock); - list_for_each_entry(cb, &hci_cb_list, list) { + list_for_each_entry_safe(cb, tmp, &list, list) { if (cb->key_change_cfm) cb->key_change_cfm(conn, status); + kfree(cb); } - mutex_unlock(&hci_cb_list_lock); } static inline void hci_role_switch_cfm(struct hci_conn *conn, __u8 status, __u8 role) { - struct hci_cb *cb; + struct list_head list; + struct hci_cb *cb, *tmp; + + INIT_LIST_HEAD(&list); + hci_cb_lookup(conn, &list); - mutex_lock(&hci_cb_list_lock); - list_for_each_entry(cb, &hci_cb_list, list) { + list_for_each_entry_safe(cb, tmp, &list, list) { if (cb->role_switch_cfm) cb->role_switch_cfm(conn, status, role); + kfree(cb); } - mutex_unlock(&hci_cb_list_lock); } static inline bool hci_bdaddr_is_rpa(bdaddr_t *bdaddr, u8 addr_type) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 076e4e7061c99..948bf344cb9a8 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -58,7 +58,6 @@ DEFINE_RWLOCK(hci_dev_list_lock); /* HCI callback list */ LIST_HEAD(hci_cb_list); -DEFINE_MUTEX(hci_cb_list_lock); /* HCI ID Numbering */ static DEFINE_IDA(hci_index_ida); @@ -2973,9 +2972,7 @@ int hci_register_cb(struct hci_cb *cb) { BT_DBG("%p name %s", cb, cb->name); - mutex_lock(&hci_cb_list_lock); - list_add_tail(&cb->list, &hci_cb_list); - mutex_unlock(&hci_cb_list_lock); + list_add_tail_rcu(&cb->list, &hci_cb_list); return 0; } @@ -2985,9 +2982,8 @@ int hci_unregister_cb(struct hci_cb *cb) { BT_DBG("%p name %s", cb, cb->name); - mutex_lock(&hci_cb_list_lock); - list_del(&cb->list); - mutex_unlock(&hci_cb_list_lock); + list_del_rcu(&cb->list); + synchronize_rcu(); return 0; } diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index b94d202bf3745..f165cafa3aa98 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -1929,6 +1929,11 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) return lm; } +static bool iso_match(struct hci_conn *hcon) +{ + return hcon->type == ISO_LINK || hcon->type == LE_LINK; +} + static void iso_connect_cfm(struct hci_conn *hcon, __u8 status) { if (hcon->type != ISO_LINK) { @@ -2110,6 +2115,7 @@ void iso_recv(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) static struct hci_cb iso_cb = { .name = "ISO", + .match = iso_match, .connect_cfm = iso_connect_cfm, .disconn_cfm = iso_disconn_cfm, }; diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 93651c421767a..acb148759bd04 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -7223,6 +7223,11 @@ static struct l2cap_chan *l2cap_global_fixed_chan(struct l2cap_chan *c, return NULL; } +static bool l2cap_match(struct hci_conn *hcon) +{ + return hcon->type == ACL_LINK || hcon->type == LE_LINK; +} + static void l2cap_connect_cfm(struct hci_conn *hcon, u8 status) { struct hci_dev *hdev = hcon->hdev; @@ -7230,9 +7235,6 @@ static void l2cap_connect_cfm(struct hci_conn *hcon, u8 status) struct l2cap_chan *pchan; u8 dst_type; - if (hcon->type != ACL_LINK && hcon->type != LE_LINK) - return; - BT_DBG("hcon %p bdaddr %pMR status %d", hcon, &hcon->dst, status); if (status) { @@ -7297,9 +7299,6 @@ int l2cap_disconn_ind(struct hci_conn *hcon) static void l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason) { - if (hcon->type != ACL_LINK && hcon->type != LE_LINK) - return; - BT_DBG("hcon %p reason %d", hcon, reason); l2cap_conn_del(hcon, bt_to_errno(reason)); @@ -7578,6 +7577,7 @@ void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) static struct hci_cb l2cap_cb = { .name = "L2CAP", + .match = l2cap_match, .connect_cfm = l2cap_connect_cfm, .disconn_cfm = l2cap_disconn_cfm, .security_cfm = l2cap_security_cfm, diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 1d34d84970332..9d46afb24caf0 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -2134,6 +2134,11 @@ static int rfcomm_run(void *unused) return 0; } +static bool rfcomm_match(struct hci_conn *hcon) +{ + return hcon->type == ACL_LINK; +} + static void rfcomm_security_cfm(struct hci_conn *conn, u8 status, u8 encrypt) { struct rfcomm_session *s; @@ -2180,6 +2185,7 @@ static void rfcomm_security_cfm(struct hci_conn *conn, u8 status, u8 encrypt) static struct hci_cb rfcomm_cb = { .name = "RFCOMM", + .match = rfcomm_match, .security_cfm = rfcomm_security_cfm }; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 64d4d57c7033a..c4c36ff25fb20 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -1353,11 +1353,13 @@ int sco_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) return lm; } -static void sco_connect_cfm(struct hci_conn *hcon, __u8 status) +static bool sco_match(struct hci_conn *hcon) { - if (hcon->type != SCO_LINK && hcon->type != ESCO_LINK) - return; + return hcon->type == SCO_LINK || hcon->type == ESCO_LINK; +} +static void sco_connect_cfm(struct hci_conn *hcon, __u8 status) +{ BT_DBG("hcon %p bdaddr %pMR status %u", hcon, &hcon->dst, status); if (!status) { @@ -1372,9 +1374,6 @@ static void sco_connect_cfm(struct hci_conn *hcon, __u8 status) static void sco_disconn_cfm(struct hci_conn *hcon, __u8 reason) { - if (hcon->type != SCO_LINK && hcon->type != ESCO_LINK) - return; - BT_DBG("hcon %p reason %d", hcon, reason); sco_conn_del(hcon, bt_to_errno(reason)); @@ -1400,6 +1399,7 @@ void sco_recv_scodata(struct hci_conn *hcon, struct sk_buff *skb) static struct hci_cb sco_cb = { .name = "SCO", + .match = sco_match, .connect_cfm = sco_connect_cfm, .disconn_cfm = sco_disconn_cfm, }; From 026e4039ae94f5f7b7cc0cecf2b09e98c954fbff Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Fri, 13 Dec 2024 15:57:53 +0100 Subject: [PATCH 181/216] irqchip/gic: Correct declaration of *percpu_base pointer in union gic_base [ Upstream commit a1855f1b7c33642c9f7a01991fb763342a312e9b ] percpu_base is used in various percpu functions that expect variable in __percpu address space. Correct the declaration of percpu_base to void __iomem * __percpu *percpu_base; to declare the variable as __percpu pointer. The patch fixes several sparse warnings: irq-gic.c:1172:44: warning: incorrect type in assignment (different address spaces) irq-gic.c:1172:44: expected void [noderef] __percpu *[noderef] __iomem *percpu_base irq-gic.c:1172:44: got void [noderef] __iomem *[noderef] __percpu * ... irq-gic.c:1231:43: warning: incorrect type in argument 1 (different address spaces) irq-gic.c:1231:43: expected void [noderef] __percpu *__pdata irq-gic.c:1231:43: got void [noderef] __percpu *[noderef] __iomem *percpu_base There were no changes in the resulting object files. Signed-off-by: Uros Bizjak Signed-off-by: Thomas Gleixner Acked-by: Marc Zyngier Link: https://lore.kernel.org/all/20241213145809.2918-2-ubizjak@gmail.com Signed-off-by: Sasha Levin (cherry picked from commit d8f3f7d30f65d514c75b25feaeb83c3fefcf7c39) --- drivers/irqchip/irq-gic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index 412196a7dad58..2c6c50348afd1 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -64,7 +64,7 @@ static void gic_check_cpu_features(void) union gic_base { void __iomem *common_base; - void __percpu * __iomem *percpu_base; + void __iomem * __percpu *percpu_base; }; struct gic_chip_data { From 2776461fe0c51292605c6a4fde16ad4c86ac2c24 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 3 Dec 2024 14:37:15 +0200 Subject: [PATCH 182/216] ARC: build: Try to guess GCC variant of cross compiler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 824927e88456331c7a999fdf5d9d27923b619590 ] ARC GCC compiler is packaged starting from Fedora 39i and the GCC variant of cross compile tools has arc-linux-gnu- prefix and not arc-linux-. This is causing that CROSS_COMPILE variable is left unset. This change allows builds without need to supply CROSS_COMPILE argument if distro package is used. Before this change: $ make -j 128 ARCH=arc W=1 drivers/infiniband/hw/mlx4/ gcc: warning: ‘-mcpu=’ is deprecated; use ‘-mtune=’ or ‘-march=’ instead gcc: error: unrecognized command-line option ‘-mmedium-calls’ gcc: error: unrecognized command-line option ‘-mlock’ gcc: error: unrecognized command-line option ‘-munaligned-access’ [1] https://packages.fedoraproject.org/pkgs/cross-gcc/gcc-arc-linux-gnu/index.html Signed-off-by: Leon Romanovsky Signed-off-by: Vineet Gupta Signed-off-by: Sasha Levin (cherry picked from commit f2b94ee08ec66c5b1cb13e6289ee3bbef35c1cdb) --- arch/arc/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arc/Makefile b/arch/arc/Makefile index 2390dd042e363..fb98478ed1ab0 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -6,7 +6,7 @@ KBUILD_DEFCONFIG := haps_hs_smp_defconfig ifeq ($(CROSS_COMPILE),) -CROSS_COMPILE := $(call cc-cross-prefix, arc-linux- arceb-linux-) +CROSS_COMPILE := $(call cc-cross-prefix, arc-linux- arceb-linux- arc-linux-gnu-) endif cflags-y += -fno-common -pipe -fno-builtin -mmedium-calls -D__linux__ From 22716e583b8a1efbbeaa8c783a9234a47d36236c Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Tue, 16 Jan 2024 08:09:25 -0600 Subject: [PATCH 183/216] seq_buf: Make DECLARE_SEQ_BUF() usable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 7a8e9cdf9405819105ae7405cd91e482bf574b01 ] Using the address operator on the array doesn't work: ./include/linux/seq_buf.h:27:27: error: initialization of ‘char *’ from incompatible pointer type ‘char (*)[128]’ [-Werror=incompatible-pointer-types] 27 | .buffer = &__ ## NAME ## _buffer, \ | ^ Apart from fixing that, we can improve DECLARE_SEQ_BUF() by using a compound literal to define the buffer array without attaching a name to it. This makes the macro a single statement, allowing constructs such as: static DECLARE_SEQ_BUF(my_seq_buf, MYSB_SIZE); to work as intended. Link: https://lkml.kernel.org/r/20240116-declare-seq-buf-fix-v1-1-915db4692f32@linux.ibm.com Cc: stable@vger.kernel.org Acked-by: Kees Cook Fixes: dcc4e5728eea ("seq_buf: Introduce DECLARE_SEQ_BUF and seq_buf_str()") Signed-off-by: Nathan Lynch Signed-off-by: Steven Rostedt (Google) Signed-off-by: Sasha Levin (cherry picked from commit c3b5a7d6a13baa7e5d6deadb929da20809b345e8) --- include/linux/seq_buf.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/linux/seq_buf.h b/include/linux/seq_buf.h index d9db59f420a49..468d8c5eef4a0 100644 --- a/include/linux/seq_buf.h +++ b/include/linux/seq_buf.h @@ -22,9 +22,8 @@ struct seq_buf { }; #define DECLARE_SEQ_BUF(NAME, SIZE) \ - char __ ## NAME ## _buffer[SIZE] = ""; \ struct seq_buf NAME = { \ - .buffer = &__ ## NAME ## _buffer, \ + .buffer = (char[SIZE]) { 0 }, \ .size = SIZE, \ } From 75aec180a94b55b12c0df67a29b0c8f6070fcbfd Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Wed, 4 Sep 2024 03:04:13 -0700 Subject: [PATCH 184/216] RDMA/bnxt_re: Fix the max WQE size for static WQE support [ Upstream commit 227f51743b61fe3f6fc481f0fb8086bf8c49b8c9 ] When variable size WQE is supported, max_qp_sges reported is more than 6. For devices that supports variable size WQE, the Send WQE size calculation is wrong when an an older library that doesn't support variable size WQE is used. Set the WQE size to 128 when static WQE is supported. Fixes: de1d364c3815 ("RDMA/bnxt_re: Add support for Variable WQE in Genp7 adapters") Signed-off-by: Selvin Xavier Link: https://patch.msgid.link/1725444253-13221-3-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin (cherry picked from commit 36e1b6890f228ccfc867031ecedffe50958b25e4) --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 21 ++++++++++----------- drivers/infiniband/hw/bnxt_re/qplib_sp.h | 2 ++ 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 540998ddbb445..13c65ec582568 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -992,23 +992,22 @@ static int bnxt_re_setup_swqe_size(struct bnxt_re_qp *qp, align = sizeof(struct sq_send_hdr); ilsize = ALIGN(init_attr->cap.max_inline_data, align); - sq->wqe_size = bnxt_re_get_wqe_size(ilsize, sq->max_sge); - if (sq->wqe_size > bnxt_re_get_swqe_size(dev_attr->max_qp_sges)) - return -EINVAL; - /* For gen p4 and gen p5 backward compatibility mode - * wqe size is fixed to 128 bytes + /* For gen p4 and gen p5 fixed wqe compatibility mode + * wqe size is fixed to 128 bytes - ie 6 SGEs */ - if (sq->wqe_size < bnxt_re_get_swqe_size(dev_attr->max_qp_sges) && - qplqp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) - sq->wqe_size = bnxt_re_get_swqe_size(dev_attr->max_qp_sges); + if (qplqp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) { + sq->wqe_size = bnxt_re_get_swqe_size(BNXT_STATIC_MAX_SGE); + sq->max_sge = BNXT_STATIC_MAX_SGE; + } else { + sq->wqe_size = bnxt_re_get_wqe_size(ilsize, sq->max_sge); + if (sq->wqe_size > bnxt_re_get_swqe_size(dev_attr->max_qp_sges)) + return -EINVAL; + } if (init_attr->cap.max_inline_data) { qplqp->max_inline_data = sq->wqe_size - sizeof(struct sq_send_hdr); init_attr->cap.max_inline_data = qplqp->max_inline_data; - if (qplqp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) - sq->max_sge = qplqp->max_inline_data / - sizeof(struct sq_sge); } return 0; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h index b91e6a85e75d9..aeacd0a9a92cc 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h @@ -358,4 +358,6 @@ int bnxt_qplib_modify_cc(struct bnxt_qplib_res *res, #define BNXT_VAR_MAX_SGE 13 #define BNXT_RE_MAX_RQ_WQES 65536 +#define BNXT_STATIC_MAX_SGE 6 + #endif /* __BNXT_QPLIB_SP_H__*/ From d1b097055bcb5e82267b32b58d4be4c1fcd334ff Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 3 Nov 2024 21:52:57 +0900 Subject: [PATCH 185/216] modpost: fix input MODULE_DEVICE_TABLE() built for 64-bit on 32-bit host [ Upstream commit 77dc55a978e69625f9718460012e5ef0172dc4de ] When building a 64-bit kernel on a 32-bit build host, incorrect input MODULE_ALIAS() entries may be generated. For example, when compiling a 64-bit kernel with CONFIG_INPUT_MOUSEDEV=m on a 64-bit build machine, you will get the correct output: $ grep MODULE_ALIAS drivers/input/mousedev.mod.c MODULE_ALIAS("input:b*v*p*e*-e*1,*2,*k*110,*r*0,*1,*a*m*l*s*f*w*"); MODULE_ALIAS("input:b*v*p*e*-e*1,*2,*k*r*8,*a*m*l*s*f*w*"); MODULE_ALIAS("input:b*v*p*e*-e*1,*3,*k*14A,*r*a*0,*1,*m*l*s*f*w*"); MODULE_ALIAS("input:b*v*p*e*-e*1,*3,*k*145,*r*a*0,*1,*18,*1C,*m*l*s*f*w*"); MODULE_ALIAS("input:b*v*p*e*-e*1,*3,*k*110,*r*a*0,*1,*m*l*s*f*w*"); However, building the same kernel on a 32-bit machine results in incorrect output: $ grep MODULE_ALIAS drivers/input/mousedev.mod.c MODULE_ALIAS("input:b*v*p*e*-e*1,*2,*k*110,*130,*r*0,*1,*a*m*l*s*f*w*"); MODULE_ALIAS("input:b*v*p*e*-e*1,*2,*k*r*8,*a*m*l*s*f*w*"); MODULE_ALIAS("input:b*v*p*e*-e*1,*3,*k*14A,*16A,*r*a*0,*1,*20,*21,*m*l*s*f*w*"); MODULE_ALIAS("input:b*v*p*e*-e*1,*3,*k*145,*165,*r*a*0,*1,*18,*1C,*20,*21,*38,*3C,*m*l*s*f*w*"); MODULE_ALIAS("input:b*v*p*e*-e*1,*3,*k*110,*130,*r*a*0,*1,*20,*21,*m*l*s*f*w*"); A similar issue occurs with CONFIG_INPUT_JOYDEV=m. On a 64-bit build machine, the output is: $ grep MODULE_ALIAS drivers/input/joydev.mod.c MODULE_ALIAS("input:b*v*p*e*-e*3,*k*r*a*0,*m*l*s*f*w*"); MODULE_ALIAS("input:b*v*p*e*-e*3,*k*r*a*2,*m*l*s*f*w*"); MODULE_ALIAS("input:b*v*p*e*-e*3,*k*r*a*8,*m*l*s*f*w*"); MODULE_ALIAS("input:b*v*p*e*-e*3,*k*r*a*6,*m*l*s*f*w*"); MODULE_ALIAS("input:b*v*p*e*-e*1,*k*120,*r*a*m*l*s*f*w*"); MODULE_ALIAS("input:b*v*p*e*-e*1,*k*130,*r*a*m*l*s*f*w*"); MODULE_ALIAS("input:b*v*p*e*-e*1,*k*2C0,*r*a*m*l*s*f*w*"); However, on a 32-bit machine, the output is incorrect: $ grep MODULE_ALIAS drivers/input/joydev.mod.c MODULE_ALIAS("input:b*v*p*e*-e*3,*k*r*a*0,*20,*m*l*s*f*w*"); MODULE_ALIAS("input:b*v*p*e*-e*3,*k*r*a*2,*22,*m*l*s*f*w*"); MODULE_ALIAS("input:b*v*p*e*-e*3,*k*r*a*8,*28,*m*l*s*f*w*"); MODULE_ALIAS("input:b*v*p*e*-e*3,*k*r*a*6,*26,*m*l*s*f*w*"); MODULE_ALIAS("input:b*v*p*e*-e*1,*k*11F,*13F,*r*a*m*l*s*f*w*"); MODULE_ALIAS("input:b*v*p*e*-e*1,*k*11F,*13F,*r*a*m*l*s*f*w*"); MODULE_ALIAS("input:b*v*p*e*-e*1,*k*2C0,*2E0,*r*a*m*l*s*f*w*"); When building a 64-bit kernel, BITS_PER_LONG is defined as 64. However, on a 32-bit build machine, the constant 1L is a signed 32-bit value. Left-shifting it beyond 32 bits causes wraparound, and shifting by 31 or 63 bits makes it a negative value. The fix in commit e0e92632715f ("[PATCH] PATCH: 1 line 2.6.18 bugfix: modpost-64bit-fix.patch") is incorrect; it only addresses cases where a 64-bit kernel is built on a 64-bit build machine, overlooking cases on a 32-bit build machine. Using 1ULL ensures a 64-bit width on both 32-bit and 64-bit machines, avoiding the wraparound issue. Fixes: e0e92632715f ("[PATCH] PATCH: 1 line 2.6.18 bugfix: modpost-64bit-fix.patch") Signed-off-by: Masahiro Yamada Stable-dep-of: bf36b4bf1b9a ("modpost: fix the missed iteration for the max bit in do_input()") Signed-off-by: Sasha Levin (cherry picked from commit f93e9ae0ba5ebfb11466ce5370be9e9a85bc093d) --- scripts/mod/file2alias.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c index efbb4836ec668..96f37fe1b9924 100644 --- a/scripts/mod/file2alias.c +++ b/scripts/mod/file2alias.c @@ -743,7 +743,7 @@ static void do_input(char *alias, for (i = min / BITS_PER_LONG; i < max / BITS_PER_LONG + 1; i++) arr[i] = TO_NATIVE(arr[i]); for (i = min; i < max; i++) - if (arr[i / BITS_PER_LONG] & (1L << (i%BITS_PER_LONG))) + if (arr[i / BITS_PER_LONG] & (1ULL << (i%BITS_PER_LONG))) sprintf(alias + strlen(alias), "%X,*", i); } From d1bd1ec9a1ab8e3ee604acf07b63c6a6fd5aa094 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 26 Dec 2024 00:33:35 +0900 Subject: [PATCH 186/216] modpost: fix the missed iteration for the max bit in do_input() [ Upstream commit bf36b4bf1b9a7a0015610e2f038ee84ddb085de2 ] This loop should iterate over the range from 'min' to 'max' inclusively. The last interation is missed. Fixes: 1d8f430c15b3 ("[PATCH] Input: add modalias support") Signed-off-by: Masahiro Yamada Tested-by: John Paul Adrian Glaubitz Signed-off-by: Sasha Levin (cherry picked from commit bc6962f2dbaf1676c8cbb8b04522f26a186bf416) --- scripts/mod/file2alias.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c index 96f37fe1b9924..ea498eff1f2ae 100644 --- a/scripts/mod/file2alias.c +++ b/scripts/mod/file2alias.c @@ -742,7 +742,7 @@ static void do_input(char *alias, for (i = min / BITS_PER_LONG; i < max / BITS_PER_LONG + 1; i++) arr[i] = TO_NATIVE(arr[i]); - for (i = min; i < max; i++) + for (i = min; i <= max; i++) if (arr[i / BITS_PER_LONG] & (1ULL << (i%BITS_PER_LONG))) sprintf(alias + strlen(alias), "%X,*", i); } From 2c8a90065925fd57b43335d1b79e9fca0ba3a9a5 Mon Sep 17 00:00:00 2001 From: Shung-Hsi Yu Date: Sun, 5 Jan 2025 14:27:43 +0800 Subject: [PATCH 187/216] Revert "bpf: support non-r10 register spill/fill to/from stack in precision tracking" Revert commit ecc2aeeaa08a355d84d3ca9c3d2512399a194f29 which is commit 41f6f64e6999a837048b1bd13a2f8742964eca6b upstream. Levi reported that commit ecc2aeeaa08a ("bpf: support non-r10 register spill/fill to/from stack in precision tracking") cause eBPF program that previously loads successfully in stable 6.6 now fails to load, when the same program also loads successfully in v6.13-rc5. Revert ecc2aeeaa08a until the problem has been probably figured out and resolved. Fixes: ecc2aeeaa08a ("bpf: support non-r10 register spill/fill to/from stack in precision tracking") Reported-by: Levi Zim Link: https://lore.kernel.org/stable/MEYP282MB2312C3C8801476C4F262D6E1C6162@MEYP282MB2312.AUSP282.PROD.OUTLOOK.COM/ Signed-off-by: Shung-Hsi Yu Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 199f0452873741fa4b8d4d88958e929030b2f92b) --- include/linux/bpf_verifier.h | 31 +--- kernel/bpf/verifier.c | 175 ++++++++---------- .../bpf/progs/verifier_subprog_precision.c | 23 +-- .../testing/selftests/bpf/verifier/precise.c | 38 ++-- 4 files changed, 98 insertions(+), 169 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index cb8e97665eaa5..92919d52f7e1b 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -319,34 +319,12 @@ struct bpf_func_state { struct bpf_stack_state *stack; }; -#define MAX_CALL_FRAMES 8 - -/* instruction history flags, used in bpf_jmp_history_entry.flags field */ -enum { - /* instruction references stack slot through PTR_TO_STACK register; - * we also store stack's frame number in lower 3 bits (MAX_CALL_FRAMES is 8) - * and accessed stack slot's index in next 6 bits (MAX_BPF_STACK is 512, - * 8 bytes per slot, so slot index (spi) is [0, 63]) - */ - INSN_F_FRAMENO_MASK = 0x7, /* 3 bits */ - - INSN_F_SPI_MASK = 0x3f, /* 6 bits */ - INSN_F_SPI_SHIFT = 3, /* shifted 3 bits to the left */ - - INSN_F_STACK_ACCESS = BIT(9), /* we need 10 bits total */ -}; - -static_assert(INSN_F_FRAMENO_MASK + 1 >= MAX_CALL_FRAMES); -static_assert(INSN_F_SPI_MASK + 1 >= MAX_BPF_STACK / 8); - -struct bpf_jmp_history_entry { +struct bpf_idx_pair { + u32 prev_idx; u32 idx; - /* insn idx can't be bigger than 1 million */ - u32 prev_idx : 22; - /* special flags, e.g., whether insn is doing register stack spill/load */ - u32 flags : 10; }; +#define MAX_CALL_FRAMES 8 /* Maximum number of register states that can exist at once */ #define BPF_ID_MAP_SIZE ((MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE) * MAX_CALL_FRAMES) struct bpf_verifier_state { @@ -429,7 +407,7 @@ struct bpf_verifier_state { * For most states jmp_history_cnt is [0-3]. * For loops can go up to ~40. */ - struct bpf_jmp_history_entry *jmp_history; + struct bpf_idx_pair *jmp_history; u32 jmp_history_cnt; u32 dfs_depth; u32 callback_unroll_depth; @@ -662,7 +640,6 @@ struct bpf_verifier_env { int cur_stack; } cfg; struct backtrack_state bt; - struct bpf_jmp_history_entry *cur_hist_ent; u32 pass_cnt; /* number of times do_check() was called */ u32 subprog_cnt; /* number of instructions analyzed by the verifier */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index a3c3c66ca0475..d6a4102312fad 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1762,8 +1762,8 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state, int i, err; dst_state->jmp_history = copy_array(dst_state->jmp_history, src->jmp_history, - src->jmp_history_cnt, sizeof(*dst_state->jmp_history), - GFP_USER); + src->jmp_history_cnt, sizeof(struct bpf_idx_pair), + GFP_USER); if (!dst_state->jmp_history) return -ENOMEM; dst_state->jmp_history_cnt = src->jmp_history_cnt; @@ -3397,21 +3397,6 @@ static int check_reg_arg(struct bpf_verifier_env *env, u32 regno, return __check_reg_arg(env, state->regs, regno, t); } -static int insn_stack_access_flags(int frameno, int spi) -{ - return INSN_F_STACK_ACCESS | (spi << INSN_F_SPI_SHIFT) | frameno; -} - -static int insn_stack_access_spi(int insn_flags) -{ - return (insn_flags >> INSN_F_SPI_SHIFT) & INSN_F_SPI_MASK; -} - -static int insn_stack_access_frameno(int insn_flags) -{ - return insn_flags & INSN_F_FRAMENO_MASK; -} - static void mark_jmp_point(struct bpf_verifier_env *env, int idx) { env->insn_aux_data[idx].jmp_point = true; @@ -3423,51 +3408,28 @@ static bool is_jmp_point(struct bpf_verifier_env *env, int insn_idx) } /* for any branch, call, exit record the history of jmps in the given state */ -static int push_jmp_history(struct bpf_verifier_env *env, struct bpf_verifier_state *cur, - int insn_flags) +static int push_jmp_history(struct bpf_verifier_env *env, + struct bpf_verifier_state *cur) { u32 cnt = cur->jmp_history_cnt; - struct bpf_jmp_history_entry *p; + struct bpf_idx_pair *p; size_t alloc_size; - /* combine instruction flags if we already recorded this instruction */ - if (env->cur_hist_ent) { - /* atomic instructions push insn_flags twice, for READ and - * WRITE sides, but they should agree on stack slot - */ - WARN_ONCE((env->cur_hist_ent->flags & insn_flags) && - (env->cur_hist_ent->flags & insn_flags) != insn_flags, - "verifier insn history bug: insn_idx %d cur flags %x new flags %x\n", - env->insn_idx, env->cur_hist_ent->flags, insn_flags); - env->cur_hist_ent->flags |= insn_flags; + if (!is_jmp_point(env, env->insn_idx)) return 0; - } cnt++; alloc_size = kmalloc_size_roundup(size_mul(cnt, sizeof(*p))); p = krealloc(cur->jmp_history, alloc_size, GFP_USER); if (!p) return -ENOMEM; + p[cnt - 1].idx = env->insn_idx; + p[cnt - 1].prev_idx = env->prev_insn_idx; cur->jmp_history = p; - - p = &cur->jmp_history[cnt - 1]; - p->idx = env->insn_idx; - p->prev_idx = env->prev_insn_idx; - p->flags = insn_flags; cur->jmp_history_cnt = cnt; - env->cur_hist_ent = p; - return 0; } -static struct bpf_jmp_history_entry *get_jmp_hist_entry(struct bpf_verifier_state *st, - u32 hist_end, int insn_idx) -{ - if (hist_end > 0 && st->jmp_history[hist_end - 1].idx == insn_idx) - return &st->jmp_history[hist_end - 1]; - return NULL; -} - /* Backtrack one insn at a time. If idx is not at the top of recorded * history then previous instruction came from straight line execution. * Return -ENOENT if we exhausted all instructions within given state. @@ -3629,14 +3591,9 @@ static inline bool bt_is_reg_set(struct backtrack_state *bt, u32 reg) return bt->reg_masks[bt->frame] & (1 << reg); } -static inline bool bt_is_frame_slot_set(struct backtrack_state *bt, u32 frame, u32 slot) -{ - return bt->stack_masks[frame] & (1ull << slot); -} - static inline bool bt_is_slot_set(struct backtrack_state *bt, u32 slot) { - return bt_is_frame_slot_set(bt, bt->frame, slot); + return bt->stack_masks[bt->frame] & (1ull << slot); } /* format registers bitmask, e.g., "r0,r2,r4" for 0x15 mask */ @@ -3690,7 +3647,7 @@ static bool calls_callback(struct bpf_verifier_env *env, int insn_idx); * - *was* processed previously during backtracking. */ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx, - struct bpf_jmp_history_entry *hist, struct backtrack_state *bt) + struct backtrack_state *bt) { const struct bpf_insn_cbs cbs = { .cb_call = disasm_kfunc_name, @@ -3703,7 +3660,7 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx, u8 mode = BPF_MODE(insn->code); u32 dreg = insn->dst_reg; u32 sreg = insn->src_reg; - u32 spi, i, fr; + u32 spi, i; if (insn->code == 0) return 0; @@ -3766,15 +3723,20 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx, * by 'precise' mark in corresponding register of this state. * No further tracking necessary. */ - if (!hist || !(hist->flags & INSN_F_STACK_ACCESS)) + if (insn->src_reg != BPF_REG_FP) return 0; + /* dreg = *(u64 *)[fp - off] was a fill from the stack. * that [fp - off] slot contains scalar that needs to be * tracked with precision */ - spi = insn_stack_access_spi(hist->flags); - fr = insn_stack_access_frameno(hist->flags); - bt_set_frame_slot(bt, fr, spi); + spi = (-insn->off - 1) / BPF_REG_SIZE; + if (spi >= 64) { + verbose(env, "BUG spi %d\n", spi); + WARN_ONCE(1, "verifier backtracking bug"); + return -EFAULT; + } + bt_set_slot(bt, spi); } else if (class == BPF_STX || class == BPF_ST) { if (bt_is_reg_set(bt, dreg)) /* stx & st shouldn't be using _scalar_ dst_reg @@ -3783,13 +3745,17 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx, */ return -ENOTSUPP; /* scalars can only be spilled into stack */ - if (!hist || !(hist->flags & INSN_F_STACK_ACCESS)) + if (insn->dst_reg != BPF_REG_FP) return 0; - spi = insn_stack_access_spi(hist->flags); - fr = insn_stack_access_frameno(hist->flags); - if (!bt_is_frame_slot_set(bt, fr, spi)) + spi = (-insn->off - 1) / BPF_REG_SIZE; + if (spi >= 64) { + verbose(env, "BUG spi %d\n", spi); + WARN_ONCE(1, "verifier backtracking bug"); + return -EFAULT; + } + if (!bt_is_slot_set(bt, spi)) return 0; - bt_clear_frame_slot(bt, fr, spi); + bt_clear_slot(bt, spi); if (class == BPF_STX) bt_set_reg(bt, sreg); } else if (class == BPF_JMP || class == BPF_JMP32) { @@ -3833,14 +3799,10 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx, WARN_ONCE(1, "verifier backtracking bug"); return -EFAULT; } - /* we are now tracking register spills correctly, - * so any instance of leftover slots is a bug - */ - if (bt_stack_mask(bt) != 0) { - verbose(env, "BUG stack slots %llx\n", bt_stack_mask(bt)); - WARN_ONCE(1, "verifier backtracking bug (subprog leftover stack slots)"); - return -EFAULT; - } + /* we don't track register spills perfectly, + * so fallback to force-precise instead of failing */ + if (bt_stack_mask(bt) != 0) + return -ENOTSUPP; /* propagate r1-r5 to the caller */ for (i = BPF_REG_1; i <= BPF_REG_5; i++) { if (bt_is_reg_set(bt, i)) { @@ -3865,11 +3827,8 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx, WARN_ONCE(1, "verifier backtracking bug"); return -EFAULT; } - if (bt_stack_mask(bt) != 0) { - verbose(env, "BUG stack slots %llx\n", bt_stack_mask(bt)); - WARN_ONCE(1, "verifier backtracking bug (callback leftover stack slots)"); - return -EFAULT; - } + if (bt_stack_mask(bt) != 0) + return -ENOTSUPP; /* clear r1-r5 in callback subprog's mask */ for (i = BPF_REG_1; i <= BPF_REG_5; i++) bt_clear_reg(bt, i); @@ -4306,7 +4265,6 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno) for (;;) { DECLARE_BITMAP(mask, 64); u32 history = st->jmp_history_cnt; - struct bpf_jmp_history_entry *hist; if (env->log.level & BPF_LOG_LEVEL2) { verbose(env, "mark_precise: frame%d: last_idx %d first_idx %d subseq_idx %d \n", @@ -4370,8 +4328,7 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno) err = 0; skip_first = false; } else { - hist = get_jmp_hist_entry(st, history, i); - err = backtrack_insn(env, i, subseq_idx, hist, bt); + err = backtrack_insn(env, i, subseq_idx, bt); } if (err == -ENOTSUPP) { mark_all_scalars_precise(env, env->cur_state); @@ -4424,10 +4381,22 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno) bitmap_from_u64(mask, bt_frame_stack_mask(bt, fr)); for_each_set_bit(i, mask, 64) { if (i >= func->allocated_stack / BPF_REG_SIZE) { - verbose(env, "BUG backtracking (stack slot %d, total slots %d)\n", - i, func->allocated_stack / BPF_REG_SIZE); - WARN_ONCE(1, "verifier backtracking bug (stack slot out of bounds)"); - return -EFAULT; + /* the sequence of instructions: + * 2: (bf) r3 = r10 + * 3: (7b) *(u64 *)(r3 -8) = r0 + * 4: (79) r4 = *(u64 *)(r10 -8) + * doesn't contain jmps. It's backtracked + * as a single block. + * During backtracking insn 3 is not recognized as + * stack access, so at the end of backtracking + * stack slot fp-8 is still marked in stack_mask. + * However the parent state may not have accessed + * fp-8 and it's "unallocated" stack space. + * In such case fallback to conservative. + */ + mark_all_scalars_precise(env, env->cur_state); + bt_reset(bt); + return 0; } if (!is_spilled_scalar_reg(&func->stack[i])) { @@ -4592,7 +4561,7 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err; struct bpf_insn *insn = &env->prog->insnsi[insn_idx]; struct bpf_reg_state *reg = NULL; - int insn_flags = insn_stack_access_flags(state->frameno, spi); + u32 dst_reg = insn->dst_reg; /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0, * so it's aligned access and [off, off + size) are within stack limits @@ -4631,6 +4600,17 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, mark_stack_slot_scratched(env, spi); if (reg && !(off % BPF_REG_SIZE) && register_is_bounded(reg) && !register_is_null(reg) && env->bpf_capable) { + if (dst_reg != BPF_REG_FP) { + /* The backtracking logic can only recognize explicit + * stack slot address like [fp - 8]. Other spill of + * scalar via different register has to be conservative. + * Backtrack from here and mark all registers as precise + * that contributed into 'reg' being a constant. + */ + err = mark_chain_precision(env, value_regno); + if (err) + return err; + } save_register_state(state, spi, reg, size); /* Break the relation on a narrowing spill. */ if (fls64(reg->umax_value) > BITS_PER_BYTE * size) @@ -4642,7 +4622,6 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, __mark_reg_known(&fake_reg, insn->imm); fake_reg.type = SCALAR_VALUE; save_register_state(state, spi, &fake_reg, size); - insn_flags = 0; /* not a register spill */ } else if (reg && is_spillable_regtype(reg->type)) { /* register containing pointer is being spilled into stack */ if (size != BPF_REG_SIZE) { @@ -4688,12 +4667,9 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, /* Mark slots affected by this stack write. */ for (i = 0; i < size; i++) - state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] = type; - insn_flags = 0; /* not a register spill */ + state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] = + type; } - - if (insn_flags) - return push_jmp_history(env, env->cur_state, insn_flags); return 0; } @@ -4882,7 +4858,6 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env, int i, slot = -off - 1, spi = slot / BPF_REG_SIZE; struct bpf_reg_state *reg; u8 *stype, type; - int insn_flags = insn_stack_access_flags(reg_state->frameno, spi); stype = reg_state->stack[spi].slot_type; reg = ®_state->stack[spi].spilled_ptr; @@ -4928,10 +4903,12 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env, return -EACCES; } mark_reg_unknown(env, state->regs, dst_regno); - insn_flags = 0; /* not restoring original register state */ } state->regs[dst_regno].live |= REG_LIVE_WRITTEN; - } else if (dst_regno >= 0) { + return 0; + } + + if (dst_regno >= 0) { /* restore register state from stack */ copy_register_state(&state->regs[dst_regno], reg); /* mark reg as written since spilled pointer state likely @@ -4967,10 +4944,7 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env, mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64); if (dst_regno >= 0) mark_reg_stack_read(env, reg_state, off, off + size, dst_regno); - insn_flags = 0; /* we are not restoring spilled register */ } - if (insn_flags) - return push_jmp_history(env, env->cur_state, insn_flags); return 0; } @@ -7054,6 +7028,7 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i BPF_SIZE(insn->code), BPF_WRITE, -1, true, false); if (err) return err; + return 0; } @@ -16802,8 +16777,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) * the precision needs to be propagated back in * the current state. */ - if (is_jmp_point(env, env->insn_idx)) - err = err ? : push_jmp_history(env, cur, 0); + err = err ? : push_jmp_history(env, cur); err = err ? : propagate_precision(env, &sl->state); if (err) return err; @@ -17027,9 +17001,6 @@ static int do_check(struct bpf_verifier_env *env) u8 class; int err; - /* reset current history entry on each new instruction */ - env->cur_hist_ent = NULL; - env->prev_insn_idx = prev_insn_idx; if (env->insn_idx >= insn_cnt) { verbose(env, "invalid insn idx %d insn_cnt %d\n", @@ -17069,7 +17040,7 @@ static int do_check(struct bpf_verifier_env *env) } if (is_jmp_point(env, env->insn_idx)) { - err = push_jmp_history(env, state, 0); + err = push_jmp_history(env, state); if (err) return err; } diff --git a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c index f87365f7599bf..f61d623b1ce8d 100644 --- a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c +++ b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c @@ -541,24 +541,11 @@ static __u64 subprog_spill_reg_precise(void) SEC("?raw_tp") __success __log_level(2) -__msg("10: (0f) r1 += r7") -__msg("mark_precise: frame0: last_idx 10 first_idx 7 subseq_idx -1") -__msg("mark_precise: frame0: regs=r7 stack= before 9: (bf) r1 = r8") -__msg("mark_precise: frame0: regs=r7 stack= before 8: (27) r7 *= 4") -__msg("mark_precise: frame0: regs=r7 stack= before 7: (79) r7 = *(u64 *)(r10 -8)") -__msg("mark_precise: frame0: parent state regs= stack=-8: R0_w=2 R6_w=1 R8_rw=map_value(map=.data.vals,ks=4,vs=16) R10=fp0 fp-8_rw=P1") -__msg("mark_precise: frame0: last_idx 18 first_idx 0 subseq_idx 7") -__msg("mark_precise: frame0: regs= stack=-8 before 18: (95) exit") -__msg("mark_precise: frame1: regs= stack= before 17: (0f) r0 += r2") -__msg("mark_precise: frame1: regs= stack= before 16: (79) r2 = *(u64 *)(r1 +0)") -__msg("mark_precise: frame1: regs= stack= before 15: (79) r0 = *(u64 *)(r10 -16)") -__msg("mark_precise: frame1: regs= stack= before 14: (7b) *(u64 *)(r10 -16) = r2") -__msg("mark_precise: frame1: regs= stack= before 13: (7b) *(u64 *)(r1 +0) = r2") -__msg("mark_precise: frame1: regs=r2 stack= before 6: (85) call pc+6") -__msg("mark_precise: frame0: regs=r2 stack= before 5: (bf) r2 = r6") -__msg("mark_precise: frame0: regs=r6 stack= before 4: (07) r1 += -8") -__msg("mark_precise: frame0: regs=r6 stack= before 3: (bf) r1 = r10") -__msg("mark_precise: frame0: regs=r6 stack= before 2: (b7) r6 = 1") +/* precision backtracking can't currently handle stack access not through r10, + * so we won't be able to mark stack slot fp-8 as precise, and so will + * fallback to forcing all as precise + */ +__msg("mark_precise: frame0: falling back to forcing all scalars precise") __naked int subprog_spill_into_parent_stack_slot_precise(void) { asm volatile ( diff --git a/tools/testing/selftests/bpf/verifier/precise.c b/tools/testing/selftests/bpf/verifier/precise.c index 8a2ff81d83508..0d84dd1f38b6b 100644 --- a/tools/testing/selftests/bpf/verifier/precise.c +++ b/tools/testing/selftests/bpf/verifier/precise.c @@ -140,11 +140,10 @@ .result = REJECT, }, { - "precise: ST zero to stack insn is supported", + "precise: ST insn causing spi > allocated_stack", .insns = { BPF_MOV64_REG(BPF_REG_3, BPF_REG_10), BPF_JMP_IMM(BPF_JNE, BPF_REG_3, 123, 0), - /* not a register spill, so we stop precision propagation for R4 here */ BPF_ST_MEM(BPF_DW, BPF_REG_3, -8, 0), BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), BPF_MOV64_IMM(BPF_REG_0, -1), @@ -158,11 +157,11 @@ mark_precise: frame0: last_idx 4 first_idx 2\ mark_precise: frame0: regs=r4 stack= before 4\ mark_precise: frame0: regs=r4 stack= before 3\ + mark_precise: frame0: regs= stack=-8 before 2\ + mark_precise: frame0: falling back to forcing all scalars precise\ + force_precise: frame0: forcing r0 to be precise\ mark_precise: frame0: last_idx 5 first_idx 5\ - mark_precise: frame0: parent state regs=r0 stack=:\ - mark_precise: frame0: last_idx 4 first_idx 2\ - mark_precise: frame0: regs=r0 stack= before 4\ - 5: R0=-1 R4=0", + mark_precise: frame0: parent state regs= stack=:", .result = VERBOSE_ACCEPT, .retval = -1, }, @@ -170,8 +169,6 @@ "precise: STX insn causing spi > allocated_stack", .insns = { BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32), - /* make later reg spill more interesting by having somewhat known scalar */ - BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xff), BPF_MOV64_REG(BPF_REG_3, BPF_REG_10), BPF_JMP_IMM(BPF_JNE, BPF_REG_3, 123, 0), BPF_STX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, -8), @@ -182,21 +179,18 @@ }, .prog_type = BPF_PROG_TYPE_XDP, .flags = BPF_F_TEST_STATE_FREQ, - .errstr = "mark_precise: frame0: last_idx 7 first_idx 7\ + .errstr = "mark_precise: frame0: last_idx 6 first_idx 6\ mark_precise: frame0: parent state regs=r4 stack=:\ - mark_precise: frame0: last_idx 6 first_idx 4\ - mark_precise: frame0: regs=r4 stack= before 6: (b7) r0 = -1\ - mark_precise: frame0: regs=r4 stack= before 5: (79) r4 = *(u64 *)(r10 -8)\ - mark_precise: frame0: regs= stack=-8 before 4: (7b) *(u64 *)(r3 -8) = r0\ - mark_precise: frame0: parent state regs=r0 stack=:\ - mark_precise: frame0: last_idx 3 first_idx 3\ - mark_precise: frame0: regs=r0 stack= before 3: (55) if r3 != 0x7b goto pc+0\ - mark_precise: frame0: regs=r0 stack= before 2: (bf) r3 = r10\ - mark_precise: frame0: regs=r0 stack= before 1: (57) r0 &= 255\ - mark_precise: frame0: parent state regs=r0 stack=:\ - mark_precise: frame0: last_idx 0 first_idx 0\ - mark_precise: frame0: regs=r0 stack= before 0: (85) call bpf_get_prandom_u32#7\ - mark_precise: frame0: last_idx 7 first_idx 7\ + mark_precise: frame0: last_idx 5 first_idx 3\ + mark_precise: frame0: regs=r4 stack= before 5\ + mark_precise: frame0: regs=r4 stack= before 4\ + mark_precise: frame0: regs= stack=-8 before 3\ + mark_precise: frame0: falling back to forcing all scalars precise\ + force_precise: frame0: forcing r0 to be precise\ + force_precise: frame0: forcing r0 to be precise\ + force_precise: frame0: forcing r0 to be precise\ + force_precise: frame0: forcing r0 to be precise\ + mark_precise: frame0: last_idx 6 first_idx 6\ mark_precise: frame0: parent state regs= stack=:", .result = VERBOSE_ACCEPT, .retval = -1, From fca65c0b21396c0d0fe7cb8f56e2543331f7f322 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 31 Dec 2024 15:53:58 +0100 Subject: [PATCH 188/216] ALSA: seq: Check UMP support for midi_version change commit 8765429279e7d3d68d39ace5f84af2815174bb1e upstream. When the kernel is built without UMP support but a user-space app requires the midi_version > 0, the kernel should return an error. Otherwise user-space assumes as if it were possible to deal, eventually hitting serious errors later. Fixes: 46397622a3fa ("ALSA: seq: Add UMP support") Cc: Link: https://patch.msgid.link/20241231145358.21946-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 396964d45ca5dc035f5247f90ec800016744661a) --- sound/core/seq/seq_clientmgr.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c index e115fe1836349..8b7dfbc8e8207 100644 --- a/sound/core/seq/seq_clientmgr.c +++ b/sound/core/seq/seq_clientmgr.c @@ -1280,10 +1280,16 @@ static int snd_seq_ioctl_set_client_info(struct snd_seq_client *client, if (client->type != client_info->type) return -EINVAL; - /* check validity of midi_version field */ - if (client->user_pversion >= SNDRV_PROTOCOL_VERSION(1, 0, 3) && - client_info->midi_version > SNDRV_SEQ_CLIENT_UMP_MIDI_2_0) - return -EINVAL; + if (client->user_pversion >= SNDRV_PROTOCOL_VERSION(1, 0, 3)) { + /* check validity of midi_version field */ + if (client_info->midi_version > SNDRV_SEQ_CLIENT_UMP_MIDI_2_0) + return -EINVAL; + + /* check if UMP is supported in kernel */ + if (!IS_ENABLED(CONFIG_SND_SEQ_UMP) && + client_info->midi_version > 0) + return -EINVAL; + } /* fill the info fields */ if (client_info->name[0]) From 6eeab1abbccf6fd2421280f70d1fab23f99dcfe2 Mon Sep 17 00:00:00 2001 From: Daniel Schaefer Date: Tue, 31 Dec 2024 12:59:58 +0800 Subject: [PATCH 189/216] ALSA hda/realtek: Add quirk for Framework F111:000C commit 7b509910b3ad6d7aacead24c8744de10daf8715d upstream. Similar to commit eb91c456f371 ("ALSA: hda/realtek: Add Framework Laptop 13 (Intel Core Ultra) to quirks") and previous quirks for Framework systems with Realtek codecs. 000C is a new platform that will also have an ALC285 codec and needs the same quirk. Cc: Jaroslav Kysela Cc: Takashi Iwai Cc: linux@frame.work Cc: Dustin L. Howett Signed-off-by: Daniel Schaefer Cc: Link: https://patch.msgid.link/20241231045958.14545-1-dhs@frame.work Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 7d1f59defa9ec7f076c8da5d660dcc2a6be81262) --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 031cfc4744c01..fc93af80f0bff 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10443,6 +10443,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0xf111, 0x0001, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0xf111, 0x0006, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0xf111, 0x0009, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0xf111, 0x000c, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE), #if 0 /* Below is a quirk table taken from the old code. From 2739775926ef7efcd12e6300dbb8f28d9731714f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 30 Dec 2024 12:05:35 +0100 Subject: [PATCH 190/216] ALSA: seq: oss: Fix races at processing SysEx messages commit 0179488ca992d79908b8e26b9213f1554fc5bacc upstream. OSS sequencer handles the SysEx messages split in 6 bytes packets, and ALSA sequencer OSS layer tries to combine those. It stores the data in the internal buffer and this access is racy as of now, which may lead to the out-of-bounds access. As a temporary band-aid fix, introduce a mutex for serializing the process of the SysEx message packets. Reported-by: Kun Hu Closes: https://lore.kernel.org/2B7E93E4-B13A-4AE4-8E87-306A8EE9BBB7@m.fudan.edu.cn Cc: Link: https://patch.msgid.link/20241230110543.32454-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman (cherry picked from commit d2392b79d8af3714ea8878b71c66dc49d3110f44) --- sound/core/seq/oss/seq_oss_synth.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/core/seq/oss/seq_oss_synth.c b/sound/core/seq/oss/seq_oss_synth.c index e3394919daa09..51ee4c00a8431 100644 --- a/sound/core/seq/oss/seq_oss_synth.c +++ b/sound/core/seq/oss/seq_oss_synth.c @@ -66,6 +66,7 @@ static struct seq_oss_synth midi_synth_dev = { }; static DEFINE_SPINLOCK(register_lock); +static DEFINE_MUTEX(sysex_mutex); /* * prototypes @@ -497,6 +498,7 @@ snd_seq_oss_synth_sysex(struct seq_oss_devinfo *dp, int dev, unsigned char *buf, if (!info) return -ENXIO; + guard(mutex)(&sysex_mutex); sysex = info->sysex; if (sysex == NULL) { sysex = kzalloc(sizeof(*sysex), GFP_KERNEL); From 6957fe6cf270db172d0c55ba29c61aa44489163a Mon Sep 17 00:00:00 2001 From: Dennis Lam Date: Tue, 17 Dec 2024 21:39:25 -0500 Subject: [PATCH 191/216] ocfs2: fix slab-use-after-free due to dangling pointer dqi_priv commit 5f3fd772d152229d94602bca243fbb658068a597 upstream. When mounting ocfs2 and then remounting it as read-only, a slab-use-after-free occurs after the user uses a syscall to quota_getnextquota. Specifically, sb_dqinfo(sb, type)->dqi_priv is the dangling pointer. During the remounting process, the pointer dqi_priv is freed but is never set as null leaving it to be accessed. Additionally, the read-only option for remounting sets the DQUOT_SUSPENDED flag instead of setting the DQUOT_USAGE_ENABLED flags. Moreover, later in the process of getting the next quota, the function ocfs2_get_next_id is called and only checks the quota usage flags and not the quota suspended flags. To fix this, I set dqi_priv to null when it is freed after remounting with read-only and put a check for DQUOT_SUSPENDED in ocfs2_get_next_id. [akpm@linux-foundation.org: coding-style cleanups] Link: https://lkml.kernel.org/r/20241218023924.22821-2-dennis.lamerice@gmail.com Fixes: 8f9e8f5fcc05 ("ocfs2: Fix Q_GETNEXTQUOTA for filesystem without quotas") Signed-off-by: Dennis Lam Reported-by: syzbot+d173bf8a5a7faeede34c@syzkaller.appspotmail.com Tested-by: syzbot+d173bf8a5a7faeede34c@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/6731d26f.050a0220.1fb99c.014b.GAE@google.com/T/ Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Jun Piao Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 2e3d203b1adede46bbba049e497765d67865be18) --- fs/ocfs2/quota_global.c | 2 +- fs/ocfs2/quota_local.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index dc9f76ab7e13c..0dffd6a44d39d 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c @@ -881,7 +881,7 @@ static int ocfs2_get_next_id(struct super_block *sb, struct kqid *qid) int status = 0; trace_ocfs2_get_next_id(from_kqid(&init_user_ns, *qid), type); - if (!sb_has_quota_loaded(sb, type)) { + if (!sb_has_quota_active(sb, type)) { status = -ESRCH; goto out; } diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index 257f13cdd14c1..4b4fa58cd32ff 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c @@ -864,6 +864,7 @@ static int ocfs2_local_free_info(struct super_block *sb, int type) brelse(oinfo->dqi_libh); brelse(oinfo->dqi_lqi_bh); kfree(oinfo); + info->dqi_priv = NULL; return status; } From ecd1de387a2d23e3c8a7bb6799eb311b41467fbf Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 17 Dec 2024 08:18:10 +0100 Subject: [PATCH 192/216] kcov: mark in_softirq_really() as __always_inline commit cb0ca08b326aa03f87fe94bb91872ce8d2ef1ed8 upstream. If gcc decides not to inline in_softirq_really(), objtool warns about a function call with UACCESS enabled: kernel/kcov.o: warning: objtool: __sanitizer_cov_trace_pc+0x1e: call to in_softirq_really() with UACCESS enabled kernel/kcov.o: warning: objtool: check_kcov_mode+0x11: call to in_softirq_really() with UACCESS enabled Mark this as __always_inline to avoid the problem. Link: https://lkml.kernel.org/r/20241217071814.2261620-1-arnd@kernel.org Fixes: 7d4df2dad312 ("kcov: properly check for softirq context") Signed-off-by: Arnd Bergmann Reviewed-by: Marco Elver Cc: Aleksandr Nogikh Cc: Andrey Konovalov Cc: Dmitry Vyukov Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman (cherry picked from commit edc8ece96c11c89de82e400488b44772b0efb157) --- kernel/kcov.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/kcov.c b/kernel/kcov.c index 72d9aa6fb50c3..097c8afa67557 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -165,7 +165,7 @@ static void kcov_remote_area_put(struct kcov_remote_area *area, * Unlike in_serving_softirq(), this function returns false when called during * a hardirq or an NMI that happened in the softirq context. */ -static inline bool in_softirq_really(void) +static __always_inline bool in_softirq_really(void) { return in_serving_softirq() && !in_hardirq() && !in_nmi(); } From b8f60a81942870172e2314ffca02f34d3bf960d6 Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Thu, 26 Dec 2024 22:03:32 +0800 Subject: [PATCH 193/216] scripts/sorttable: fix orc_sort_cmp() to maintain symmetry and transitivity commit 0210d251162f4033350a94a43f95b1c39ec84a90 upstream. The orc_sort_cmp() function, used with qsort(), previously violated the symmetry and transitivity rules required by the C standard. Specifically, when both entries are ORC_TYPE_UNDEFINED, it could result in both a < b and b < a, which breaks the required symmetry and transitivity. This can lead to undefined behavior and incorrect sorting results, potentially causing memory corruption in glibc implementations [1]. Symmetry: If x < y, then y > x. Transitivity: If x < y and y < z, then x < z. Fix the comparison logic to return 0 when both entries are ORC_TYPE_UNDEFINED, ensuring compliance with qsort() requirements. Link: https://www.qualys.com/2024/01/30/qsort.txt [1] Link: https://lkml.kernel.org/r/20241226140332.2670689-1-visitorckw@gmail.com Fixes: 57fa18994285 ("scripts/sorttable: Implement build-time ORC unwind table sorting") Fixes: fb799447ae29 ("x86,objtool: Split UNWIND_HINT_EMPTY in two") Signed-off-by: Kuan-Wei Chiu Cc: Ching-Chun (Jim) Huang Cc: Cc: Ingo Molnar Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: Shile Zhang Cc: Steven Rostedt Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman (cherry picked from commit c9818b61d0a859ecec77ec3253b418dc89e5e695) --- scripts/sorttable.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scripts/sorttable.h b/scripts/sorttable.h index 7bd0184380d3b..a7c5445baf002 100644 --- a/scripts/sorttable.h +++ b/scripts/sorttable.h @@ -110,7 +110,7 @@ static inline unsigned long orc_ip(const int *ip) static int orc_sort_cmp(const void *_a, const void *_b) { - struct orc_entry *orc_a; + struct orc_entry *orc_a, *orc_b; const int *a = g_orc_ip_table + *(int *)_a; const int *b = g_orc_ip_table + *(int *)_b; unsigned long a_val = orc_ip(a); @@ -128,6 +128,9 @@ static int orc_sort_cmp(const void *_a, const void *_b) * whitelisted .o files which didn't get objtool generation. */ orc_a = g_orc_table + (a - g_orc_ip_table); + orc_b = g_orc_table + (b - g_orc_ip_table); + if (orc_a->type == ORC_TYPE_UNDEFINED && orc_b->type == ORC_TYPE_UNDEFINED) + return 0; return orc_a->type == ORC_TYPE_UNDEFINED ? -1 : 1; } From d88d619b3e213e2cc5aa22865e1a8943e4b1a265 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 30 Nov 2024 13:06:41 +0300 Subject: [PATCH 194/216] RDMA/uverbs: Prevent integer overflow issue commit d0257e089d1bbd35c69b6c97ff73e3690ab149a9 upstream. In the expression "cmd.wqe_size * cmd.wr_count", both variables are u32 values that come from the user so the multiplication can lead to integer wrapping. Then we pass the result to uverbs_request_next_ptr() which also could potentially wrap. The "cmd.sge_count * sizeof(struct ib_uverbs_sge)" multiplication can also overflow on 32bit systems although it's fine on 64bit systems. This patch does two things. First, I've re-arranged the condition in uverbs_request_next_ptr() so that the use controlled variable "len" is on one side of the comparison by itself without any math. Then I've modified all the callers to use size_mul() for the multiplications. Fixes: 67cdb40ca444 ("[IB] uverbs: Implement more commands") Cc: stable@vger.kernel.org Signed-off-by: Dan Carpenter Link: https://patch.msgid.link/b8765ab3-c2da-4611-aae0-ddd6ba173d23@stanley.mountain Signed-off-by: Leon Romanovsky Signed-off-by: Greg Kroah-Hartman (cherry picked from commit b92667f755749cf10d9ef1088865c555ae83ffb7) --- drivers/infiniband/core/uverbs_cmd.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index e836c9c477f67..c6053e82ecf6f 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -161,7 +161,7 @@ static const void __user *uverbs_request_next_ptr(struct uverbs_req_iter *iter, { const void __user *res = iter->cur; - if (iter->cur + len > iter->end) + if (len > iter->end - iter->cur) return (void __force __user *)ERR_PTR(-ENOSPC); iter->cur += len; return res; @@ -2009,11 +2009,13 @@ static int ib_uverbs_post_send(struct uverbs_attr_bundle *attrs) ret = uverbs_request_start(attrs, &iter, &cmd, sizeof(cmd)); if (ret) return ret; - wqes = uverbs_request_next_ptr(&iter, cmd.wqe_size * cmd.wr_count); + wqes = uverbs_request_next_ptr(&iter, size_mul(cmd.wqe_size, + cmd.wr_count)); if (IS_ERR(wqes)) return PTR_ERR(wqes); - sgls = uverbs_request_next_ptr( - &iter, cmd.sge_count * sizeof(struct ib_uverbs_sge)); + sgls = uverbs_request_next_ptr(&iter, + size_mul(cmd.sge_count, + sizeof(struct ib_uverbs_sge))); if (IS_ERR(sgls)) return PTR_ERR(sgls); ret = uverbs_request_finish(&iter); @@ -2199,11 +2201,11 @@ ib_uverbs_unmarshall_recv(struct uverbs_req_iter *iter, u32 wr_count, if (wqe_size < sizeof(struct ib_uverbs_recv_wr)) return ERR_PTR(-EINVAL); - wqes = uverbs_request_next_ptr(iter, wqe_size * wr_count); + wqes = uverbs_request_next_ptr(iter, size_mul(wqe_size, wr_count)); if (IS_ERR(wqes)) return ERR_CAST(wqes); - sgls = uverbs_request_next_ptr( - iter, sge_count * sizeof(struct ib_uverbs_sge)); + sgls = uverbs_request_next_ptr(iter, size_mul(sge_count, + sizeof(struct ib_uverbs_sge))); if (IS_ERR(sgls)) return ERR_CAST(sgls); ret = uverbs_request_finish(iter); From e3e2a0c36a6c1c23bbf0befbb5adb841c3804486 Mon Sep 17 00:00:00 2001 From: Evgenii Shatokhin Date: Mon, 9 Dec 2024 10:46:59 +0300 Subject: [PATCH 195/216] pinctrl: mcp23s08: Fix sleeping in atomic context due to regmap locking commit a37eecb705f33726f1fb7cd2a67e514a15dfe693 upstream. If a device uses MCP23xxx IO expander to receive IRQs, the following bug can happen: BUG: sleeping function called from invalid context at kernel/locking/mutex.c:283 in_atomic(): 1, irqs_disabled(): 1, non_block: 0, ... preempt_count: 1, expected: 0 ... Call Trace: ... __might_resched+0x104/0x10e __might_sleep+0x3e/0x62 mutex_lock+0x20/0x4c regmap_lock_mutex+0x10/0x18 regmap_update_bits_base+0x2c/0x66 mcp23s08_irq_set_type+0x1ae/0x1d6 __irq_set_trigger+0x56/0x172 __setup_irq+0x1e6/0x646 request_threaded_irq+0xb6/0x160 ... We observed the problem while experimenting with a touchscreen driver which used MCP23017 IO expander (I2C). The regmap in the pinctrl-mcp23s08 driver uses a mutex for protection from concurrent accesses, which is the default for regmaps without .fast_io, .disable_locking, etc. mcp23s08_irq_set_type() calls regmap_update_bits_base(), and the latter locks the mutex. However, __setup_irq() locks desc->lock spinlock before calling these functions. As a result, the system tries to lock the mutex whole holding the spinlock. It seems, the internal regmap locks are not needed in this driver at all. mcp->lock seems to protect the regmap from concurrent accesses already, except, probably, in mcp_pinconf_get/set. mcp23s08_irq_set_type() and mcp23s08_irq_mask/unmask() are called under chip_bus_lock(), which calls mcp23s08_irq_bus_lock(). The latter takes mcp->lock and enables regmap caching, so that the potentially slow I2C accesses are deferred until chip_bus_unlock(). The accesses to the regmap from mcp23s08_probe_one() do not need additional locking. In all remaining places where the regmap is accessed, except mcp_pinconf_get/set(), the driver already takes mcp->lock. This patch adds locking in mcp_pinconf_get/set() and disables internal locking in the regmap config. Among other things, it fixes the sleeping in atomic context described above. Fixes: 8f38910ba4f6 ("pinctrl: mcp23s08: switch to regmap caching") Cc: stable@vger.kernel.org Signed-off-by: Evgenii Shatokhin Link: https://lore.kernel.org/20241209074659.1442898-1-e.shatokhin@yadro.com Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 8c6fd5803b988a5e78c9b9e42c70a936d7cfc6ec) --- drivers/pinctrl/pinctrl-mcp23s08.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/pinctrl/pinctrl-mcp23s08.c b/drivers/pinctrl/pinctrl-mcp23s08.c index 4551575e4e7d7..fd97b6ee2a8d1 100644 --- a/drivers/pinctrl/pinctrl-mcp23s08.c +++ b/drivers/pinctrl/pinctrl-mcp23s08.c @@ -86,6 +86,7 @@ const struct regmap_config mcp23x08_regmap = { .num_reg_defaults = ARRAY_SIZE(mcp23x08_defaults), .cache_type = REGCACHE_FLAT, .max_register = MCP_OLAT, + .disable_locking = true, /* mcp->lock protects the regmap */ }; EXPORT_SYMBOL_GPL(mcp23x08_regmap); @@ -132,6 +133,7 @@ const struct regmap_config mcp23x17_regmap = { .num_reg_defaults = ARRAY_SIZE(mcp23x17_defaults), .cache_type = REGCACHE_FLAT, .val_format_endian = REGMAP_ENDIAN_LITTLE, + .disable_locking = true, /* mcp->lock protects the regmap */ }; EXPORT_SYMBOL_GPL(mcp23x17_regmap); @@ -228,7 +230,9 @@ static int mcp_pinconf_get(struct pinctrl_dev *pctldev, unsigned int pin, switch (param) { case PIN_CONFIG_BIAS_PULL_UP: + mutex_lock(&mcp->lock); ret = mcp_read(mcp, MCP_GPPU, &data); + mutex_unlock(&mcp->lock); if (ret < 0) return ret; status = (data & BIT(pin)) ? 1 : 0; @@ -257,7 +261,9 @@ static int mcp_pinconf_set(struct pinctrl_dev *pctldev, unsigned int pin, switch (param) { case PIN_CONFIG_BIAS_PULL_UP: + mutex_lock(&mcp->lock); ret = mcp_set_bit(mcp, MCP_GPPU, pin, arg); + mutex_unlock(&mcp->lock); break; default: dev_dbg(mcp->dev, "Invalid config param %04x\n", param); From 4b01aed62a51e0777c1158009217120994eccf97 Mon Sep 17 00:00:00 2001 From: Pascal Hambourg Date: Mon, 23 Dec 2024 17:44:01 +0100 Subject: [PATCH 196/216] sky2: Add device ID 11ab:4373 for Marvell 88E8075 commit 03c8d0af2e409e15c16130b185e12b5efba0a6b9 upstream. A Marvell 88E8075 ethernet controller has this device ID instead of 11ab:4370 and works fine with the sky2 driver. Signed-off-by: Pascal Hambourg Cc: stable@vger.kernel.org Link: https://patch.msgid.link/10165a62-99fb-4be6-8c64-84afd6234085@plouf.fr.eu.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman (cherry picked from commit b32c3b748d29eb1fcc9463f903b3540dccd5c887) --- drivers/net/ethernet/marvell/sky2.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c index 07720841a8d70..dd3d93720358b 100644 --- a/drivers/net/ethernet/marvell/sky2.c +++ b/drivers/net/ethernet/marvell/sky2.c @@ -129,6 +129,7 @@ static const struct pci_device_id sky2_id_table[] = { { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x436C) }, /* 88E8072 */ { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x436D) }, /* 88E8055 */ { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x4370) }, /* 88E8075 */ + { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x4373) }, /* 88E8075 */ { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x4380) }, /* 88E8057 */ { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x4381) }, /* 88E8059 */ { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0x4382) }, /* 88E8079 */ From 22d0ca8fd614f428d35d4536b3239f4c5694961b Mon Sep 17 00:00:00 2001 From: Nikolay Kuratov Date: Thu, 19 Dec 2024 19:21:14 +0300 Subject: [PATCH 197/216] net/sctp: Prevent autoclose integer overflow in sctp_association_init() commit 4e86729d1ff329815a6e8a920cb554a1d4cb5b8d upstream. While by default max_autoclose equals to INT_MAX / HZ, one may set net.sctp.max_autoclose to UINT_MAX. There is code in sctp_association_init() that can consequently trigger overflow. Cc: stable@vger.kernel.org Fixes: 9f70f46bd4c7 ("sctp: properly latch and use autoclose value from sock to association") Signed-off-by: Nikolay Kuratov Acked-by: Xin Long Link: https://patch.msgid.link/20241219162114.2863827-1-kniv@yandex-team.ru Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 271f031f4c31c07e2a85a1ba2b4c8e734909a477) --- net/sctp/associola.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sctp/associola.c b/net/sctp/associola.c index c45c192b78787..0b0794f164cf2 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -137,7 +137,8 @@ static struct sctp_association *sctp_association_init( = 5 * asoc->rto_max; asoc->timeouts[SCTP_EVENT_TIMEOUT_SACK] = asoc->sackdelay; - asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE] = sp->autoclose * HZ; + asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE] = + (unsigned long)sp->autoclose * HZ; /* Initializes the timers */ for (i = SCTP_EVENT_TIMEOUT_NONE; i < SCTP_NUM_TIMEOUT_TYPES; ++i) From d4f7ee72b1657145fb32b5db59b23ea3bf6b2bd0 Mon Sep 17 00:00:00 2001 From: Biju Das Date: Tue, 19 Nov 2024 19:20:31 +0000 Subject: [PATCH 198/216] drm: adv7511: Drop dsi single lane support commit 79d67c499c3f886202a40c5cb27e747e4fa4d738 upstream. As per [1] and [2], ADV7535/7533 supports only 2-, 3-, or 4-lane. Drop unsupported 1-lane. [1] https://www.analog.com/media/en/technical-documentation/data-sheets/ADV7535.pdf [2] https://www.analog.com/media/en/technical-documentation/data-sheets/ADV7533.pdf Fixes: 1e4d58cd7f88 ("drm/bridge: adv7533: Create a MIPI DSI device") Reported-by: Hien Huynh Cc: stable@vger.kernel.org Reviewed-by: Laurent Pinchart Reviewed-by: Adam Ford Signed-off-by: Biju Das Link: https://patchwork.freedesktop.org/patch/msgid/20241119192040.152657-4-biju.das.jz@bp.renesas.com Signed-off-by: Dmitry Baryshkov Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 7b977f8c26b65c7788da19a150434171c1df953d) --- drivers/gpu/drm/bridge/adv7511/adv7533.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/bridge/adv7511/adv7533.c b/drivers/gpu/drm/bridge/adv7511/adv7533.c index 7e3e56441aedc..3a79297ca9804 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7533.c +++ b/drivers/gpu/drm/bridge/adv7511/adv7533.c @@ -175,7 +175,7 @@ int adv7533_parse_dt(struct device_node *np, struct adv7511 *adv) of_property_read_u32(np, "adi,dsi-lanes", &num_lanes); - if (num_lanes < 1 || num_lanes > 4) + if (num_lanes < 2 || num_lanes > 4) return -EINVAL; adv->num_dsi_lanes = num_lanes; From 0ef492a909505208e71ebee78c14e6345b395de1 Mon Sep 17 00:00:00 2001 From: Biju Das Date: Tue, 19 Nov 2024 19:20:30 +0000 Subject: [PATCH 199/216] dt-bindings: display: adi,adv7533: Drop single lane support commit ee8f9ed57a397605434caeef351bafa3ec4dfdd4 upstream. As per [1] and [2], ADV7535/7533 supports only 2-, 3-, or 4-lane. Drop unsupported 1-lane from bindings. [1] https://www.analog.com/media/en/technical-documentation/data-sheets/ADV7535.pdf [2] https://www.analog.com/media/en/technical-documentation/data-sheets/ADV7533.pdf Fixes: 1e4d58cd7f88 ("drm/bridge: adv7533: Create a MIPI DSI device") Cc: stable@vger.kernel.org Acked-by: Krzysztof Kozlowski Reviewed-by: Geert Uytterhoeven Reviewed-by: Laurent Pinchart Signed-off-by: Biju Das Link: https://patchwork.freedesktop.org/patch/msgid/20241119192040.152657-3-biju.das.jz@bp.renesas.com Signed-off-by: Dmitry Baryshkov Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 79fcfc900abe967330f873e5dd702d3b43618be4) --- .../devicetree/bindings/display/bridge/adi,adv7533.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/display/bridge/adi,adv7533.yaml b/Documentation/devicetree/bindings/display/bridge/adi,adv7533.yaml index 987aa83c26494..e956f524e379d 100644 --- a/Documentation/devicetree/bindings/display/bridge/adi,adv7533.yaml +++ b/Documentation/devicetree/bindings/display/bridge/adi,adv7533.yaml @@ -87,7 +87,7 @@ properties: adi,dsi-lanes: description: Number of DSI data lanes connected to the DSI host. $ref: /schemas/types.yaml#/definitions/uint32 - enum: [ 1, 2, 3, 4 ] + enum: [ 2, 3, 4 ] ports: description: From 6c28d11996d96881ea11c5790754fcf6e0b45526 Mon Sep 17 00:00:00 2001 From: Biju Das Date: Tue, 19 Nov 2024 19:20:29 +0000 Subject: [PATCH 200/216] drm: adv7511: Fix use-after-free in adv7533_attach_dsi() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 81adbd3ff21c1182e06aa02c6be0bfd9ea02d8e8 upstream. The host_node pointer was assigned and freed in adv7533_parse_dt(), and later, adv7533_attach_dsi() uses the same. Fix this use-after-free issue by dropping of_node_put() in adv7533_parse_dt() and calling of_node_put() in error path of probe() and also in the remove(). Fixes: 1e4d58cd7f88 ("drm/bridge: adv7533: Create a MIPI DSI device") Cc: stable@vger.kernel.org Reviewed-by: Laurent Pinchart Signed-off-by: Biju Das Link: https://patchwork.freedesktop.org/patch/msgid/20241119192040.152657-2-biju.das.jz@bp.renesas.com Signed-off-by: Dmitry Baryshkov Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 1f49aaf55652580ae63ab83d67211fe6a55d83dc) --- drivers/gpu/drm/bridge/adv7511/adv7511_drv.c | 10 ++++++++-- drivers/gpu/drm/bridge/adv7511/adv7533.c | 2 -- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c index ef2b6ce544d0a..1aa4153b40e0c 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c +++ b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c @@ -1225,8 +1225,10 @@ static int adv7511_probe(struct i2c_client *i2c) return ret; ret = adv7511_init_regulators(adv7511); - if (ret) - return dev_err_probe(dev, ret, "failed to init regulators\n"); + if (ret) { + dev_err_probe(dev, ret, "failed to init regulators\n"); + goto err_of_node_put; + } /* * The power down GPIO is optional. If present, toggle it from active to @@ -1346,6 +1348,8 @@ static int adv7511_probe(struct i2c_client *i2c) i2c_unregister_device(adv7511->i2c_edid); uninit_regulators: adv7511_uninit_regulators(adv7511); +err_of_node_put: + of_node_put(adv7511->host_node); return ret; } @@ -1354,6 +1358,8 @@ static void adv7511_remove(struct i2c_client *i2c) { struct adv7511 *adv7511 = i2c_get_clientdata(i2c); + of_node_put(adv7511->host_node); + adv7511_uninit_regulators(adv7511); drm_bridge_remove(&adv7511->bridge); diff --git a/drivers/gpu/drm/bridge/adv7511/adv7533.c b/drivers/gpu/drm/bridge/adv7511/adv7533.c index 3a79297ca9804..6a4733c708270 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7533.c +++ b/drivers/gpu/drm/bridge/adv7511/adv7533.c @@ -184,8 +184,6 @@ int adv7533_parse_dt(struct device_node *np, struct adv7511 *adv) if (!adv->host_node) return -ENODEV; - of_node_put(adv->host_node); - adv->use_timing_gen = !of_property_read_bool(np, "adi,disable-timing-generator"); From ea7b9f5ebfd6e6a0f1dd8a205d730410b2805ef0 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 17 Dec 2024 20:50:00 +0100 Subject: [PATCH 201/216] fs/proc/task_mmu: fix pagemap flags with PMD THP entries on 32bit commit 3754137d263f52f4b507cf9ae913f8f0497d1b0e upstream. Entries (including flags) are u64, even on 32bit. So right now we are cutting of the flags on 32bit. This way, for example the cow selftest complains about: # ./cow ... Bail Out! read and ioctl return unmatched results for populated: 0 1 Link: https://lkml.kernel.org/r/20241217195000.1734039-1-david@redhat.com Fixes: 2c1f057e5be6 ("fs/proc/task_mmu: properly detect PM_MMAP_EXCLUSIVE per page of PMD-mapped THPs") Signed-off-by: David Hildenbrand Cc: Oscar Salvador Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 9b071576f891ee5be970df75499d9d5892815f8a) --- fs/proc/task_mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 59571737e1677..b8640f36ebf8a 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1516,7 +1516,7 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end, flags |= PM_FILE; for (; addr != end; addr += PAGE_SIZE, idx++) { - unsigned long cur_flags = flags; + u64 cur_flags = flags; pagemap_entry_t pme; if (page && (flags & PM_PRESENT) && From 39e87de98e7e0cbe7576681a2b74f68e5f8d4475 Mon Sep 17 00:00:00 2001 From: Joshua Washington Date: Wed, 18 Dec 2024 05:34:13 -0800 Subject: [PATCH 202/216] gve: guard XSK operations on the existence of queues commit 40338d7987d810fcaa95c500b1068a52b08eec9b upstream. This patch predicates the enabling and disabling of XSK pools on the existence of queues. As it stands, if the interface is down, disabling or enabling XSK pools would result in a crash, as the RX queue pointer would be NULL. XSK pool registration will occur as part of the next interface up. Similarly, xsk_wakeup needs be guarded against queues disappearing while the function is executing, so a check against the GVE_PRIV_FLAGS_NAPI_ENABLED flag is added to synchronize with the disabling of the bit and the synchronize_net() in gve_turndown. Fixes: fd8e40321a12 ("gve: Add AF_XDP zero-copy support for GQI-QPL format") Cc: stable@vger.kernel.org Signed-off-by: Joshua Washington Signed-off-by: Praveen Kaligineedi Reviewed-by: Praveen Kaligineedi Reviewed-by: Shailend Chand Reviewed-by: Willem de Bruijn Reviewed-by: Larysa Zaremba Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 771d66f2bd8c4dba1286a9163ab982cecd825718) --- drivers/net/ethernet/google/gve/gve_main.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 5703240474e5b..5a763149b06a5 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -1528,8 +1528,8 @@ static int gve_xsk_pool_enable(struct net_device *dev, if (err) return err; - /* If XDP prog is not installed, return */ - if (!priv->xdp_prog) + /* If XDP prog is not installed or interface is down, return. */ + if (!priv->xdp_prog || !netif_running(dev)) return 0; rx = &priv->rx[qid]; @@ -1574,21 +1574,16 @@ static int gve_xsk_pool_disable(struct net_device *dev, if (qid >= priv->rx_cfg.num_queues) return -EINVAL; - /* If XDP prog is not installed, unmap DMA and return */ - if (!priv->xdp_prog) + /* If XDP prog is not installed or interface is down, unmap DMA and + * return. + */ + if (!priv->xdp_prog || !netif_running(dev)) goto done; - tx_qid = gve_xdp_tx_queue_id(priv, qid); - if (!netif_running(dev)) { - priv->rx[qid].xsk_pool = NULL; - xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq); - priv->tx[tx_qid].xsk_pool = NULL; - goto done; - } - napi_rx = &priv->ntfy_blocks[priv->rx[qid].ntfy_id].napi; napi_disable(napi_rx); /* make sure current rx poll is done */ + tx_qid = gve_xdp_tx_queue_id(priv, qid); napi_tx = &priv->ntfy_blocks[priv->tx[tx_qid].ntfy_id].napi; napi_disable(napi_tx); /* make sure current tx poll is done */ @@ -1616,6 +1611,9 @@ static int gve_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) struct gve_priv *priv = netdev_priv(dev); int tx_queue_id = gve_xdp_tx_queue_id(priv, queue_id); + if (!gve_get_napi_enabled(priv)) + return -ENETDOWN; + if (queue_id >= priv->rx_cfg.num_queues || !priv->xdp_prog) return -EINVAL; From f43f5586df094b4826d39e9ba81db643a3d5973a Mon Sep 17 00:00:00 2001 From: Joshua Washington Date: Wed, 18 Dec 2024 05:34:12 -0800 Subject: [PATCH 203/216] gve: guard XDP xmit NDO on existence of xdp queues commit ff7c2dea9dd1a436fc79d6273adffdcc4a7ffea3 upstream. In GVE, dedicated XDP queues only exist when an XDP program is installed and the interface is up. As such, the NDO XDP XMIT callback should return early if either of these conditions are false. In the case of no loaded XDP program, priv->num_xdp_queues=0 which can cause a divide-by-zero error, and in the case of interface down, num_xdp_queues remains untouched to persist XDP queue count for the next interface up, but the TX pointer itself would be NULL. The XDP xmit callback also needs to synchronize with a device transitioning from open to close. This synchronization will happen via the GVE_PRIV_FLAGS_NAPI_ENABLED bit along with a synchronize_net() call, which waits for any RCU critical sections at call-time to complete. Fixes: 39a7f4aa3e4a ("gve: Add XDP REDIRECT support for GQI-QPL format") Cc: stable@vger.kernel.org Signed-off-by: Joshua Washington Signed-off-by: Praveen Kaligineedi Reviewed-by: Praveen Kaligineedi Reviewed-by: Shailend Chand Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman (cherry picked from commit cbe9eb2c39d09f3c8574febcfa39d8c09d0c7cb5) --- drivers/net/ethernet/google/gve/gve_main.c | 3 +++ drivers/net/ethernet/google/gve/gve_tx.c | 5 ++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 5a763149b06a5..d70305654e7d0 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -1755,6 +1755,9 @@ static void gve_turndown(struct gve_priv *priv) gve_clear_napi_enabled(priv); gve_clear_report_stats(priv); + + /* Make sure that all traffic is finished processing. */ + synchronize_net(); } static void gve_turnup(struct gve_priv *priv) diff --git a/drivers/net/ethernet/google/gve/gve_tx.c b/drivers/net/ethernet/google/gve/gve_tx.c index 2ae891a62875c..29987624791a6 100644 --- a/drivers/net/ethernet/google/gve/gve_tx.c +++ b/drivers/net/ethernet/google/gve/gve_tx.c @@ -777,9 +777,12 @@ int gve_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, struct gve_tx_ring *tx; int i, err = 0, qid; - if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) + if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK) || !priv->xdp_prog) return -EINVAL; + if (!gve_get_napi_enabled(priv)) + return -ENETDOWN; + qid = gve_xdp_tx_queue_id(priv, smp_processor_id() % priv->num_xdp_queues); From c632b03fe0e36b6f0940590c65df2e8d4613775e Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Fri, 6 Dec 2024 16:30:25 +0800 Subject: [PATCH 204/216] mm/readahead: fix large folio support in async readahead commit 158cdce87c8c172787063998ad5dd3e2f658b963 upstream. When testing large folio support with XFS on our servers, we observed that only a few large folios are mapped when reading large files via mmap. After a thorough analysis, I identified it was caused by the `/sys/block/*/queue/read_ahead_kb` setting. On our test servers, this parameter is set to 128KB. After I tune it to 2MB, the large folio can work as expected. However, I believe the large folio behavior should not be dependent on the value of read_ahead_kb. It would be more robust if the kernel can automatically adopt to it. With /sys/block/*/queue/read_ahead_kb set to 128KB and performing a sequential read on a 1GB file using MADV_HUGEPAGE, the differences in /proc/meminfo are as follows: - before this patch FileHugePages: 18432 kB FilePmdMapped: 4096 kB - after this patch FileHugePages: 1067008 kB FilePmdMapped: 1048576 kB This shows that after applying the patch, the entire 1GB file is mapped to huge pages. The stable list is CCed, as without this patch, large folios don't function optimally in the readahead path. It's worth noting that if read_ahead_kb is set to a larger value that isn't aligned with huge page sizes (e.g., 4MB + 128KB), it may still fail to map to hugepages. Link: https://lkml.kernel.org/r/20241108141710.9721-1-laoar.shao@gmail.com Link: https://lkml.kernel.org/r/20241206083025.3478-1-laoar.shao@gmail.com Fixes: 4687fdbb805a ("mm/filemap: Support VM_HUGEPAGE for file mappings") Signed-off-by: Yafang Shao Tested-by: kernel test robot Cc: Matthew Wilcox Cc: David Hildenbrand Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 424abdec35ec4d6cc7f34f3f9fe60a9f37ff0e33) --- mm/readahead.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/mm/readahead.c b/mm/readahead.c index e9b11d928b0c4..f1595c032ce7e 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -580,7 +580,11 @@ static void ondemand_readahead(struct readahead_control *ractl, 1UL << order); if (index == expected || index == (ra->start + ra->size)) { ra->start += ra->size; - ra->size = get_next_ra_size(ra, max_pages); + /* + * In the case of MADV_HUGEPAGE, the actual size might exceed + * the readahead window. + */ + ra->size = max(ra->size, get_next_ra_size(ra, max_pages)); ra->async_size = ra->size; goto readit; } From 495e52df469ee7b97d51498f56b13b44d05b5247 Mon Sep 17 00:00:00 2001 From: Alessandro Carminati Date: Tue, 17 Dec 2024 14:20:33 +0000 Subject: [PATCH 205/216] mm/kmemleak: fix sleeping function called from invalid context at print message MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit cddc76b165161a02ff14c4d84d0f5266d9d32b9e upstream. Address a bug in the kernel that triggers a "sleeping function called from invalid context" warning when /sys/kernel/debug/kmemleak is printed under specific conditions: - CONFIG_PREEMPT_RT=y - Set SELinux as the LSM for the system - Set kptr_restrict to 1 - kmemleak buffer contains at least one item BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:48 in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 136, name: cat preempt_count: 1, expected: 0 RCU nest depth: 2, expected: 2 6 locks held by cat/136: #0: ffff32e64bcbf950 (&p->lock){+.+.}-{3:3}, at: seq_read_iter+0xb8/0xe30 #1: ffffafe6aaa9dea0 (scan_mutex){+.+.}-{3:3}, at: kmemleak_seq_start+0x34/0x128 #3: ffff32e6546b1cd0 (&object->lock){....}-{2:2}, at: kmemleak_seq_show+0x3c/0x1e0 #4: ffffafe6aa8d8560 (rcu_read_lock){....}-{1:2}, at: has_ns_capability_noaudit+0x8/0x1b0 #5: ffffafe6aabbc0f8 (notif_lock){+.+.}-{2:2}, at: avc_compute_av+0xc4/0x3d0 irq event stamp: 136660 hardirqs last enabled at (136659): [] _raw_spin_unlock_irqrestore+0xa8/0xd8 hardirqs last disabled at (136660): [] _raw_spin_lock_irqsave+0x8c/0xb0 softirqs last enabled at (0): [] copy_process+0x11d8/0x3df8 softirqs last disabled at (0): [<0000000000000000>] 0x0 Preemption disabled at: [] kmemleak_seq_show+0x3c/0x1e0 CPU: 1 UID: 0 PID: 136 Comm: cat Tainted: G E 6.11.0-rt7+ #34 Tainted: [E]=UNSIGNED_MODULE Hardware name: linux,dummy-virt (DT) Call trace: dump_backtrace+0xa0/0x128 show_stack+0x1c/0x30 dump_stack_lvl+0xe8/0x198 dump_stack+0x18/0x20 rt_spin_lock+0x8c/0x1a8 avc_perm_nonode+0xa0/0x150 cred_has_capability.isra.0+0x118/0x218 selinux_capable+0x50/0x80 security_capable+0x7c/0xd0 has_ns_capability_noaudit+0x94/0x1b0 has_capability_noaudit+0x20/0x30 restricted_pointer+0x21c/0x4b0 pointer+0x298/0x760 vsnprintf+0x330/0xf70 seq_printf+0x178/0x218 print_unreferenced+0x1a4/0x2d0 kmemleak_seq_show+0xd0/0x1e0 seq_read_iter+0x354/0xe30 seq_read+0x250/0x378 full_proxy_read+0xd8/0x148 vfs_read+0x190/0x918 ksys_read+0xf0/0x1e0 __arm64_sys_read+0x70/0xa8 invoke_syscall.constprop.0+0xd4/0x1d8 el0_svc+0x50/0x158 el0t_64_sync+0x17c/0x180 %pS and %pK, in the same back trace line, are redundant, and %pS can void %pK service in certain contexts. %pS alone already provides the necessary information, and if it cannot resolve the symbol, it falls back to printing the raw address voiding the original intent behind the %pK. Additionally, %pK requires a privilege check CAP_SYSLOG enforced through the LSM, which can trigger a "sleeping function called from invalid context" warning under RT_PREEMPT kernels when the check occurs in an atomic context. This issue may also affect other LSMs. This change avoids the unnecessary privilege check and resolves the sleeping function warning without any loss of information. Link: https://lkml.kernel.org/r/20241217142032.55793-1-acarmina@redhat.com Fixes: 3a6f33d86baa ("mm/kmemleak: use %pK to display kernel pointers in backtrace") Signed-off-by: Alessandro Carminati Acked-by: Sebastian Andrzej Siewior Acked-by: Catalin Marinas Cc: Clément Léger Cc: Alessandro Carminati Cc: Eric Chanudet Cc: Gabriele Paoloni Cc: Juri Lelli Cc: Steven Rostedt Cc: Thomas Weißschuh Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 86d946f3f9992aaa12abcfd09f925446c2cd42a2) --- mm/kmemleak.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 54c2c90d3abc9..5811a11cc53a6 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -368,7 +368,7 @@ static void print_unreferenced(struct seq_file *seq, for (i = 0; i < nr_entries; i++) { void *ptr = (void *)entries[i]; - warn_or_seq_printf(seq, " [<%pK>] %pS\n", ptr, ptr); + warn_or_seq_printf(seq, " %pS\n", ptr); } } From 5bf1041e8c6240d477ff5c07df82c2683fb7e3d5 Mon Sep 17 00:00:00 2001 From: Seiji Nishikawa Date: Sun, 1 Dec 2024 01:12:34 +0900 Subject: [PATCH 206/216] mm: vmscan: account for free pages to prevent infinite Loop in throttle_direct_reclaim() commit 6aaced5abd32e2a57cd94fd64f824514d0361da8 upstream. The task sometimes continues looping in throttle_direct_reclaim() because allow_direct_reclaim(pgdat) keeps returning false. #0 [ffff80002cb6f8d0] __switch_to at ffff8000080095ac #1 [ffff80002cb6f900] __schedule at ffff800008abbd1c #2 [ffff80002cb6f990] schedule at ffff800008abc50c #3 [ffff80002cb6f9b0] throttle_direct_reclaim at ffff800008273550 #4 [ffff80002cb6fa20] try_to_free_pages at ffff800008277b68 #5 [ffff80002cb6fae0] __alloc_pages_nodemask at ffff8000082c4660 #6 [ffff80002cb6fc50] alloc_pages_vma at ffff8000082e4a98 #7 [ffff80002cb6fca0] do_anonymous_page at ffff80000829f5a8 #8 [ffff80002cb6fce0] __handle_mm_fault at ffff8000082a5974 #9 [ffff80002cb6fd90] handle_mm_fault at ffff8000082a5bd4 At this point, the pgdat contains the following two zones: NODE: 4 ZONE: 0 ADDR: ffff00817fffe540 NAME: "DMA32" SIZE: 20480 MIN/LOW/HIGH: 11/28/45 VM_STAT: NR_FREE_PAGES: 359 NR_ZONE_INACTIVE_ANON: 18813 NR_ZONE_ACTIVE_ANON: 0 NR_ZONE_INACTIVE_FILE: 50 NR_ZONE_ACTIVE_FILE: 0 NR_ZONE_UNEVICTABLE: 0 NR_ZONE_WRITE_PENDING: 0 NR_MLOCK: 0 NR_BOUNCE: 0 NR_ZSPAGES: 0 NR_FREE_CMA_PAGES: 0 NODE: 4 ZONE: 1 ADDR: ffff00817fffec00 NAME: "Normal" SIZE: 8454144 PRESENT: 98304 MIN/LOW/HIGH: 68/166/264 VM_STAT: NR_FREE_PAGES: 146 NR_ZONE_INACTIVE_ANON: 94668 NR_ZONE_ACTIVE_ANON: 3 NR_ZONE_INACTIVE_FILE: 735 NR_ZONE_ACTIVE_FILE: 78 NR_ZONE_UNEVICTABLE: 0 NR_ZONE_WRITE_PENDING: 0 NR_MLOCK: 0 NR_BOUNCE: 0 NR_ZSPAGES: 0 NR_FREE_CMA_PAGES: 0 In allow_direct_reclaim(), while processing ZONE_DMA32, the sum of inactive/active file-backed pages calculated in zone_reclaimable_pages() based on the result of zone_page_state_snapshot() is zero. Additionally, since this system lacks swap, the calculation of inactive/ active anonymous pages is skipped. crash> p nr_swap_pages nr_swap_pages = $1937 = { counter = 0 } As a result, ZONE_DMA32 is deemed unreclaimable and skipped, moving on to the processing of the next zone, ZONE_NORMAL, despite ZONE_DMA32 having free pages significantly exceeding the high watermark. The problem is that the pgdat->kswapd_failures hasn't been incremented. crash> px ((struct pglist_data *) 0xffff00817fffe540)->kswapd_failures $1935 = 0x0 This is because the node deemed balanced. The node balancing logic in balance_pgdat() evaluates all zones collectively. If one or more zones (e.g., ZONE_DMA32) have enough free pages to meet their watermarks, the entire node is deemed balanced. This causes balance_pgdat() to exit early before incrementing the kswapd_failures, as it considers the overall memory state acceptable, even though some zones (like ZONE_NORMAL) remain under significant pressure. The patch ensures that zone_reclaimable_pages() includes free pages (NR_FREE_PAGES) in its calculation when no other reclaimable pages are available (e.g., file-backed or anonymous pages). This change prevents zones like ZONE_DMA32, which have sufficient free pages, from being mistakenly deemed unreclaimable. By doing so, the patch ensures proper node balancing, avoids masking pressure on other zones like ZONE_NORMAL, and prevents infinite loops in throttle_direct_reclaim() caused by allow_direct_reclaim(pgdat) repeatedly returning false. The kernel hangs due to a task stuck in throttle_direct_reclaim(), caused by a node being incorrectly deemed balanced despite pressure in certain zones, such as ZONE_NORMAL. This issue arises from zone_reclaimable_pages() returning 0 for zones without reclaimable file- backed or anonymous pages, causing zones like ZONE_DMA32 with sufficient free pages to be skipped. The lack of swap or reclaimable pages results in ZONE_DMA32 being ignored during reclaim, masking pressure in other zones. Consequently, pgdat->kswapd_failures remains 0 in balance_pgdat(), preventing fallback mechanisms in allow_direct_reclaim() from being triggered, leading to an infinite loop in throttle_direct_reclaim(). This patch modifies zone_reclaimable_pages() to account for free pages (NR_FREE_PAGES) when no other reclaimable pages exist. This ensures zones with sufficient free pages are not skipped, enabling proper balancing and reclaim behavior. [akpm@linux-foundation.org: coding-style cleanups] Link: https://lkml.kernel.org/r/20241130164346.436469-1-snishika@redhat.com Link: https://lkml.kernel.org/r/20241130161236.433747-2-snishika@redhat.com Fixes: 5a1c84b404a7 ("mm: remove reclaim and compaction retry approximations") Signed-off-by: Seiji Nishikawa Cc: Mel Gorman Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 1ff2302e8aeac7f2eedb551d7a89617283b5c6b2) --- mm/vmscan.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index a62d3fabc8405..8ca341f9040d2 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -641,7 +641,14 @@ unsigned long zone_reclaimable_pages(struct zone *zone) if (can_reclaim_anon_pages(NULL, zone_to_nid(zone), NULL)) nr += zone_page_state_snapshot(zone, NR_ZONE_INACTIVE_ANON) + zone_page_state_snapshot(zone, NR_ZONE_ACTIVE_ANON); - + /* + * If there are no reclaimable file-backed or anonymous pages, + * ensure zones with sufficient free pages are not skipped. + * This prevents zones like DMA32 from being ignored in reclaim + * scenarios where they can still help alleviate memory pressure. + */ + if (nr == 0) + nr = zone_page_state_snapshot(zone, NR_FREE_PAGES); return nr; } From eed31cfb5ac9fa97380a797e2fe53c1e5925e8f4 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Sat, 21 Dec 2024 09:51:46 +0100 Subject: [PATCH 207/216] mptcp: fix TCP options overflow. commit cbb26f7d8451fe56ccac802c6db48d16240feebd upstream. Syzbot reported the following splat: Oops: general protection fault, probably for non-canonical address 0xdffffc0000000001: 0000 [#1] PREEMPT SMP KASAN PTI KASAN: null-ptr-deref in range [0x0000000000000008-0x000000000000000f] CPU: 1 UID: 0 PID: 5836 Comm: sshd Not tainted 6.13.0-rc3-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 11/25/2024 RIP: 0010:_compound_head include/linux/page-flags.h:242 [inline] RIP: 0010:put_page+0x23/0x260 include/linux/mm.h:1552 Code: 90 90 90 90 90 90 90 55 41 57 41 56 53 49 89 fe 48 bd 00 00 00 00 00 fc ff df e8 f8 5e 12 f8 49 8d 5e 08 48 89 d8 48 c1 e8 03 <80> 3c 28 00 74 08 48 89 df e8 8f c7 78 f8 48 8b 1b 48 89 de 48 83 RSP: 0000:ffffc90003916c90 EFLAGS: 00010202 RAX: 0000000000000001 RBX: 0000000000000008 RCX: ffff888030458000 RDX: 0000000000000100 RSI: 0000000000000000 RDI: 0000000000000000 RBP: dffffc0000000000 R08: ffffffff898ca81d R09: 1ffff110054414ac R10: dffffc0000000000 R11: ffffed10054414ad R12: 0000000000000007 R13: ffff88802a20a542 R14: 0000000000000000 R15: 0000000000000000 FS: 00007f34f496e800(0000) GS:ffff8880b8700000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f9d6ec9ec28 CR3: 000000004d260000 CR4: 00000000003526f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: skb_page_unref include/linux/skbuff_ref.h:43 [inline] __skb_frag_unref include/linux/skbuff_ref.h:56 [inline] skb_release_data+0x483/0x8a0 net/core/skbuff.c:1119 skb_release_all net/core/skbuff.c:1190 [inline] __kfree_skb+0x55/0x70 net/core/skbuff.c:1204 tcp_clean_rtx_queue net/ipv4/tcp_input.c:3436 [inline] tcp_ack+0x2442/0x6bc0 net/ipv4/tcp_input.c:4032 tcp_rcv_state_process+0x8eb/0x44e0 net/ipv4/tcp_input.c:6805 tcp_v4_do_rcv+0x77d/0xc70 net/ipv4/tcp_ipv4.c:1939 tcp_v4_rcv+0x2dc0/0x37f0 net/ipv4/tcp_ipv4.c:2351 ip_protocol_deliver_rcu+0x22e/0x440 net/ipv4/ip_input.c:205 ip_local_deliver_finish+0x341/0x5f0 net/ipv4/ip_input.c:233 NF_HOOK+0x3a4/0x450 include/linux/netfilter.h:314 NF_HOOK+0x3a4/0x450 include/linux/netfilter.h:314 __netif_receive_skb_one_core net/core/dev.c:5672 [inline] __netif_receive_skb+0x2bf/0x650 net/core/dev.c:5785 process_backlog+0x662/0x15b0 net/core/dev.c:6117 __napi_poll+0xcb/0x490 net/core/dev.c:6883 napi_poll net/core/dev.c:6952 [inline] net_rx_action+0x89b/0x1240 net/core/dev.c:7074 handle_softirqs+0x2d4/0x9b0 kernel/softirq.c:561 __do_softirq kernel/softirq.c:595 [inline] invoke_softirq kernel/softirq.c:435 [inline] __irq_exit_rcu+0xf7/0x220 kernel/softirq.c:662 irq_exit_rcu+0x9/0x30 kernel/softirq.c:678 instr_sysvec_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1049 [inline] sysvec_apic_timer_interrupt+0x57/0xc0 arch/x86/kernel/apic/apic.c:1049 asm_sysvec_apic_timer_interrupt+0x1a/0x20 arch/x86/include/asm/idtentry.h:702 RIP: 0033:0x7f34f4519ad5 Code: 85 d2 74 0d 0f 10 02 48 8d 54 24 20 0f 11 44 24 20 64 8b 04 25 18 00 00 00 85 c0 75 27 41 b8 08 00 00 00 b8 0f 01 00 00 0f 05 <48> 3d 00 f0 ff ff 76 75 48 8b 15 24 73 0d 00 f7 d8 64 89 02 48 83 RSP: 002b:00007ffec5b32ce0 EFLAGS: 00000246 RAX: 0000000000000001 RBX: 00000000000668a0 RCX: 00007f34f4519ad5 RDX: 00007ffec5b32d00 RSI: 0000000000000004 RDI: 0000564f4bc6cae0 RBP: 0000564f4bc6b5a0 R08: 0000000000000008 R09: 0000000000000000 R10: 00007ffec5b32de8 R11: 0000000000000246 R12: 0000564f48ea8aa4 R13: 0000000000000001 R14: 0000564f48ea93e8 R15: 00007ffec5b32d68 Eric noted a probable shinfo->nr_frags corruption, which indeed occurs. The root cause is a buggy MPTCP option len computation in some circumstances: the ADD_ADDR option should be mutually exclusive with DSS since the blamed commit. Still, mptcp_established_options_add_addr() tries to set the relevant info in mptcp_out_options, if the remaining space is large enough even when DSS is present. Since the ADD_ADDR infos and the DSS share the same union fields, adding first corrupts the latter. In the worst-case scenario, such corruption increases the DSS binary layout, exceeding the computed length and possibly overwriting the skb shared info. Address the issue by enforcing mutual exclusion in mptcp_established_options_add_addr(), too. Cc: stable@vger.kernel.org Reported-by: syzbot+38a095a81f30d82884c1@syzkaller.appspotmail.com Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/538 Fixes: 1bff1e43a30e ("mptcp: optimize out option generation") Signed-off-by: Paolo Abeni Reviewed-by: Matthieu Baerts (NGI0) Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/025d9df8cde3c9a557befc47e9bc08fbbe3476e5.1734771049.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 53fe947f67c93a5334aed3a7259fcc8a204f8bb6) --- net/mptcp/options.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 2ad9006a157ae..2e1539027e6d3 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -667,8 +667,15 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff * &echo, &drop_other_suboptions)) return false; + /* + * Later on, mptcp_write_options() will enforce mutually exclusion with + * DSS, bail out if such option is set and we can't drop it. + */ if (drop_other_suboptions) remaining += opt_size; + else if (opts->suboptions & OPTION_MPTCP_DSS) + return false; + len = mptcp_add_addr_len(opts->addr.family, echo, !!opts->addr.port); if (remaining < len) return false; From c1a2ec526fd201cbb39da4c5c6fda4b18e420b24 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 30 Dec 2024 19:12:30 +0100 Subject: [PATCH 208/216] mptcp: fix recvbuffer adjust on sleeping rcvmsg commit 449e6912a2522af672e99992e1201a454910864e upstream. If the recvmsg() blocks after receiving some data - i.e. due to SO_RCVLOWAT - the MPTCP code will attempt multiple times to adjust the receive buffer size, wrongly accounting every time the cumulative of received data - instead of accounting only for the delta. Address the issue moving mptcp_rcv_space_adjust just after the data reception and passing it only the just received bytes. This also removes an unneeded difference between the TCP and MPTCP RX code path implementation. Fixes: 581302298524 ("mptcp: error out earlier on disconnect") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20241230-net-mptcp-rbuf-fixes-v1-1-8608af434ceb@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 27c843e7644725203e3be7a0260f0d4fac51f906) --- net/mptcp/protocol.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 01f6ce970918c..ab70d3ca9b214 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1902,6 +1902,8 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) goto out; } +static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied); + static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk, struct msghdr *msg, size_t len, int flags, @@ -1955,6 +1957,7 @@ static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk, break; } + mptcp_rcv_space_adjust(msk, copied); return copied; } @@ -2231,7 +2234,6 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, } pr_debug("block timeout %ld\n", timeo); - mptcp_rcv_space_adjust(msk, copied); err = sk_wait_data(sk, &timeo, NULL); if (err < 0) { err = copied ? : err; @@ -2239,8 +2241,6 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, } } - mptcp_rcv_space_adjust(msk, copied); - out_err: if (cmsg_flags && copied >= 0) { if (cmsg_flags & MPTCP_CMSG_TS) From c76a0410c13b1f9c43da0bd4fa81753b2b0dd282 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 30 Dec 2024 19:12:31 +0100 Subject: [PATCH 209/216] mptcp: don't always assume copied data in mptcp_cleanup_rbuf() commit 551844f26da2a9f76c0a698baaffa631d1178645 upstream. Under some corner cases the MPTCP protocol can end-up invoking mptcp_cleanup_rbuf() when no data has been copied, but such helper assumes the opposite condition. Explicitly drop such assumption and performs the costly call only when strictly needed - before releasing the msk socket lock. Fixes: fd8976790a6c ("mptcp: be careful on MPTCP-level ack.") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20241230-net-mptcp-rbuf-fixes-v1-2-8608af434ceb@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman (cherry picked from commit f61e663d78ff9def3eabad63ae2d64a80513cb82) --- net/mptcp/protocol.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index ab70d3ca9b214..07f3a9703312e 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -528,13 +528,13 @@ static void mptcp_send_ack(struct mptcp_sock *msk) mptcp_subflow_send_ack(mptcp_subflow_tcp_sock(subflow)); } -static void mptcp_subflow_cleanup_rbuf(struct sock *ssk) +static void mptcp_subflow_cleanup_rbuf(struct sock *ssk, int copied) { bool slow; slow = lock_sock_fast(ssk); if (tcp_can_send_ack(ssk)) - tcp_cleanup_rbuf(ssk, 1); + tcp_cleanup_rbuf(ssk, copied); unlock_sock_fast(ssk, slow); } @@ -551,7 +551,7 @@ static bool mptcp_subflow_could_cleanup(const struct sock *ssk, bool rx_empty) (ICSK_ACK_PUSHED2 | ICSK_ACK_PUSHED))); } -static void mptcp_cleanup_rbuf(struct mptcp_sock *msk) +static void mptcp_cleanup_rbuf(struct mptcp_sock *msk, int copied) { int old_space = READ_ONCE(msk->old_wspace); struct mptcp_subflow_context *subflow; @@ -559,14 +559,14 @@ static void mptcp_cleanup_rbuf(struct mptcp_sock *msk) int space = __mptcp_space(sk); bool cleanup, rx_empty; - cleanup = (space > 0) && (space >= (old_space << 1)); - rx_empty = !__mptcp_rmem(sk); + cleanup = (space > 0) && (space >= (old_space << 1)) && copied; + rx_empty = !__mptcp_rmem(sk) && copied; mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); if (cleanup || mptcp_subflow_could_cleanup(ssk, rx_empty)) - mptcp_subflow_cleanup_rbuf(ssk); + mptcp_subflow_cleanup_rbuf(ssk, copied); } } @@ -2183,9 +2183,6 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, copied += bytes_read; - /* be sure to advertise window change */ - mptcp_cleanup_rbuf(msk); - if (skb_queue_empty(&msk->receive_queue) && __mptcp_move_skbs(msk)) continue; @@ -2234,6 +2231,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, } pr_debug("block timeout %ld\n", timeo); + mptcp_cleanup_rbuf(msk, copied); err = sk_wait_data(sk, &timeo, NULL); if (err < 0) { err = copied ? : err; @@ -2241,6 +2239,8 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, } } + mptcp_cleanup_rbuf(msk, copied); + out_err: if (cmsg_flags && copied >= 0) { if (cmsg_flags & MPTCP_CMSG_TS) From 4d0d94f9122871d5ea1d3092c3647098cabf519f Mon Sep 17 00:00:00 2001 From: Kashyap Desai Date: Wed, 4 Dec 2024 13:24:12 +0530 Subject: [PATCH 210/216] RDMA/bnxt_re: Fix max SGEs for the Work Request commit 79d330fbdffd8cee06d8bdf38d82cb62d8363a27 upstream. Gen P7 supports up to 13 SGEs for now. WQE software structure can hold only 6 now. Since the max send sge is reported as 13, the stack can give requests up to 13 SGEs. This is causing traffic failures and system crashes. Use the define for max SGE supported for variable size. This will work for both static and variable WQEs. Fixes: 227f51743b61 ("RDMA/bnxt_re: Fix the max WQE size for static WQE support") Signed-off-by: Kashyap Desai Signed-off-by: Selvin Xavier Link: https://patch.msgid.link/20241204075416.478431-2-kalesh-anakkur.purayil@broadcom.com Signed-off-by: Leon Romanovsky Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 3de1b50f055dc2ca7072a526cdda21f691c22dd9) --- drivers/infiniband/hw/bnxt_re/qplib_fp.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h index b64746d484d63..b5c53e864fbb3 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h @@ -113,7 +113,6 @@ struct bnxt_qplib_sge { u32 size; }; -#define BNXT_QPLIB_QP_MAX_SGL 6 struct bnxt_qplib_swq { u64 wr_id; int next_idx; @@ -153,7 +152,7 @@ struct bnxt_qplib_swqe { #define BNXT_QPLIB_SWQE_FLAGS_UC_FENCE BIT(2) #define BNXT_QPLIB_SWQE_FLAGS_SOLICIT_EVENT BIT(3) #define BNXT_QPLIB_SWQE_FLAGS_INLINE BIT(4) - struct bnxt_qplib_sge sg_list[BNXT_QPLIB_QP_MAX_SGL]; + struct bnxt_qplib_sge sg_list[BNXT_VAR_MAX_SGE]; int num_sge; /* Max inline data is 96 bytes */ u32 inline_len; From cf92b2fd18e421563bbab90774cdb4d5adc0fcc2 Mon Sep 17 00:00:00 2001 From: Yihang Li Date: Mon, 22 Jan 2024 14:25:45 +0800 Subject: [PATCH 211/216] scsi: hisi_sas: Remove redundant checks for automatic debugfs dump commit 3f030550476566b12091687c70071d05ad433e0d upstream. In commit 63f0733d07ce ("scsi: hisi_sas: Allocate DFX memory during dump trigger"), the memory allocation time of the DFX is changed from device initialization to dump occurs, so .debugfs_itct is not a valid address and do not need to check. The parameter hisi_sas_debugfs_enable is enough to check whether automatic debugfs dump is triggered, so remove redunant checks. Fixes: 63f0733d07ce ("scsi: hisi_sas: Allocate DFX memory during dump trigger") Signed-off-by: Yihang Li Signed-off-by: Xiang Chen Link: https://lore.kernel.org/r/1705904747-62186-3-git-send-email-chenxiang66@hisilicon.com Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 9722973ad03833ec4f693883f7f353ef128eb4d6) --- drivers/scsi/hisi_sas/hisi_sas_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index 4ce737ddb058b..f78c5f8a49ffa 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -1579,7 +1579,7 @@ static int hisi_sas_controller_prereset(struct hisi_hba *hisi_hba) return -EPERM; } - if (hisi_sas_debugfs_enable && hisi_hba->debugfs_itct[0].itct) + if (hisi_sas_debugfs_enable) hisi_hba->hw->debugfs_snapshot_regs(hisi_hba); return 0; @@ -1967,7 +1967,7 @@ static bool hisi_sas_internal_abort_timeout(struct sas_task *task, struct hisi_hba *hisi_hba = dev_to_hisi_hba(device); struct hisi_sas_internal_abort_data *timeout = data; - if (hisi_sas_debugfs_enable && hisi_hba->debugfs_itct[0].itct) { + if (hisi_sas_debugfs_enable) { /* * If timeout occurs in device gone scenario, to avoid * circular dependency like: From 2d99986e9300ced91f127ac2ec7df666a428fcc1 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 9 Jan 2025 13:32:10 +0100 Subject: [PATCH 212/216] Linux 6.6.70 Link: https://lore.kernel.org/r/20250106151150.585603565@linuxfoundation.org Tested-by: Florian Fainelli Tested-by: SeongJae Park Tested-by: Ron Economos Tested-by: Mark Brown Tested-by: Jon Hunter Tested-by: Hardik Garg Tested-by: Shuah Khan Tested-by: kernelci.org bot Tested-by: Harshit Mogalapalli Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 1acb10106df3062d221af9b3124de4d968ee34d2) --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index ec4d74866430e..bf9e6120d2710 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 6 -SUBLEVEL = 69 +SUBLEVEL = 70 EXTRAVERSION = NAME = Pinguïn Aangedreven From b5ea06252a35b8bd77c375a49570db0a80c76490 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 10 Jan 2025 13:06:34 +0100 Subject: [PATCH 213/216] Revert "x86/hyperv: Fix hv tsc page based sched_clock for hibernation" This reverts commit 6681113633dc738ec95fe33104843a1e25acef3b which is commit bcc80dec91ee745b3d66f3e48f0ec2efdea97149 upstream. The dependant patch before this one caused build errors in the 6.6.y tree, so revert this for now so that we can fix them up properly. Reported-by: Ignat Korchagin Link: https://lore.kernel.org/r/3DB3A6D3-0D3A-4682-B4FA-407B2D3263B2@cloudflare.com Reported-by: Lars Wendler Link: https://lore.kernel.org/r/20250110103328.0e3906a8@chagall.paradoxon.rec Reported-by: Chris Clayton Link: https://lore.kernel.org/r/10c7be00-b1f8-4389-801b-fb2d0b22468d@googlemail.com Cc: Dexuan Cui Cc: Naman Jain Cc: Michael Kelley Cc: Wei Liu Cc: Sasha Levin Signed-off-by: Greg Kroah-Hartman (cherry picked from commit c8bc44c5f96172fdaab66a268a53dccc6c7ffa87) --- arch/x86/kernel/cpu/mshyperv.c | 58 ------------------------------ drivers/clocksource/hyperv_timer.c | 14 +------- include/clocksource/hyperv_timer.h | 2 -- 3 files changed, 1 insertion(+), 73 deletions(-) diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 6328cf56e59be..93e1cb4f7ff19 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -224,63 +224,6 @@ static void hv_machine_crash_shutdown(struct pt_regs *regs) hyperv_cleanup(); } #endif /* CONFIG_CRASH_DUMP */ - -static u64 hv_ref_counter_at_suspend; -static void (*old_save_sched_clock_state)(void); -static void (*old_restore_sched_clock_state)(void); - -/* - * Hyper-V clock counter resets during hibernation. Save and restore clock - * offset during suspend/resume, while also considering the time passed - * before suspend. This is to make sure that sched_clock using hv tsc page - * based clocksource, proceeds from where it left off during suspend and - * it shows correct time for the timestamps of kernel messages after resume. - */ -static void save_hv_clock_tsc_state(void) -{ - hv_ref_counter_at_suspend = hv_read_reference_counter(); -} - -static void restore_hv_clock_tsc_state(void) -{ - /* - * Adjust the offsets used by hv tsc clocksource to - * account for the time spent before hibernation. - * adjusted value = reference counter (time) at suspend - * - reference counter (time) now. - */ - hv_adj_sched_clock_offset(hv_ref_counter_at_suspend - hv_read_reference_counter()); -} - -/* - * Functions to override save_sched_clock_state and restore_sched_clock_state - * functions of x86_platform. The Hyper-V clock counter is reset during - * suspend-resume and the offset used to measure time needs to be - * corrected, post resume. - */ -static void hv_save_sched_clock_state(void) -{ - old_save_sched_clock_state(); - save_hv_clock_tsc_state(); -} - -static void hv_restore_sched_clock_state(void) -{ - restore_hv_clock_tsc_state(); - old_restore_sched_clock_state(); -} - -static void __init x86_setup_ops_for_tsc_pg_clock(void) -{ - if (!(ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE)) - return; - - old_save_sched_clock_state = x86_platform.save_sched_clock_state; - x86_platform.save_sched_clock_state = hv_save_sched_clock_state; - - old_restore_sched_clock_state = x86_platform.restore_sched_clock_state; - x86_platform.restore_sched_clock_state = hv_restore_sched_clock_state; -} #endif /* CONFIG_HYPERV */ static uint32_t __init ms_hyperv_platform(void) @@ -635,7 +578,6 @@ static void __init ms_hyperv_init_platform(void) /* Register Hyper-V specific clocksource */ hv_init_clocksource(); - x86_setup_ops_for_tsc_pg_clock(); hv_vtl_init_platform(); #endif /* diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c index 5eec1457e1396..8ff7cd4e20bb1 100644 --- a/drivers/clocksource/hyperv_timer.c +++ b/drivers/clocksource/hyperv_timer.c @@ -27,8 +27,7 @@ #include static struct clock_event_device __percpu *hv_clock_event; -/* Note: offset can hold negative values after hibernation. */ -static u64 hv_sched_clock_offset __read_mostly; +static u64 hv_sched_clock_offset __ro_after_init; /* * If false, we're using the old mechanism for stimer0 interrupts @@ -457,17 +456,6 @@ static void resume_hv_clock_tsc(struct clocksource *arg) hv_set_register(HV_REGISTER_REFERENCE_TSC, tsc_msr.as_uint64); } -/* - * Called during resume from hibernation, from overridden - * x86_platform.restore_sched_clock_state routine. This is to adjust offsets - * used to calculate time for hv tsc page based sched_clock, to account for - * time spent before hibernation. - */ -void hv_adj_sched_clock_offset(u64 offset) -{ - hv_sched_clock_offset -= offset; -} - #ifdef HAVE_VDSO_CLOCKMODE_HVCLOCK static int hv_cs_enable(struct clocksource *cs) { diff --git a/include/clocksource/hyperv_timer.h b/include/clocksource/hyperv_timer.h index aa5233b1eba97..6cdc873ac907f 100644 --- a/include/clocksource/hyperv_timer.h +++ b/include/clocksource/hyperv_timer.h @@ -38,8 +38,6 @@ extern void hv_remap_tsc_clocksource(void); extern unsigned long hv_get_tsc_pfn(void); extern struct ms_hyperv_tsc_page *hv_get_tsc_page(void); -extern void hv_adj_sched_clock_offset(u64 offset); - static __always_inline bool hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg, u64 *cur_tsc, u64 *time) From 4c94b87c33e0f9febd41a03cfbead569e4968fea Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 10 Jan 2025 13:09:11 +0100 Subject: [PATCH 214/216] Revert "x86, crash: wrap crash dumping code into crash related ifdefs" This reverts commit e5b1574a8ca28c40cf53eda43f6c3b016ed41e27 which is commit a4eeb2176d89fdf2785851521577b94b31690a60 upstream. When this change is backported to the 6.6.y tree, it can cause build errors on some configurations when KEXEC is not enabled, so revert it for now. Reported-by: Ignat Korchagin Link: https://lore.kernel.org/r/3DB3A6D3-0D3A-4682-B4FA-407B2D3263B2@cloudflare.com Reported-by: Lars Wendler Link: https://lore.kernel.org/r/20250110103328.0e3906a8@chagall.paradoxon.rec Reported-by: Chris Clayton Link: https://lore.kernel.org/r/10c7be00-b1f8-4389-801b-fb2d0b22468d@googlemail.com Cc: Al Viro Cc: Andrew Morton Cc: Dexuan Cui Cc: Eric W. Biederman Cc: Hari Bathini Cc: Klara Modin Cc: Michael Kelley Cc: Michael Kelley Cc: Naman Jain Cc: Nathan Chancellor Cc: Pingfan Liu Cc: Sasha Levin Cc: Stephen Rothwell Cc: Wei Liu Cc: Yang Li Signed-off-by: Greg Kroah-Hartman (cherry picked from commit b34e805539dabbebfa6030842f4a0ba14de8f813) --- arch/x86/kernel/Makefile | 4 ++-- arch/x86/kernel/cpu/mshyperv.c | 10 ++-------- arch/x86/kernel/kexec-bzimage64.c | 4 ---- arch/x86/kernel/kvm.c | 4 ++-- arch/x86/kernel/machine_kexec_64.c | 3 --- arch/x86/kernel/reboot.c | 4 ++-- arch/x86/kernel/setup.c | 2 +- arch/x86/kernel/smp.c | 2 +- arch/x86/xen/enlighten_hvm.c | 4 ---- arch/x86/xen/mmu_pv.c | 2 +- 10 files changed, 11 insertions(+), 28 deletions(-) diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 84cb0c72b38e8..70ef205fc1601 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -99,9 +99,9 @@ obj-$(CONFIG_TRACING) += trace.o obj-$(CONFIG_RETHOOK) += rethook.o obj-$(CONFIG_CRASH_CORE) += crash_core_$(BITS).o obj-$(CONFIG_KEXEC_CORE) += machine_kexec_$(BITS).o -obj-$(CONFIG_KEXEC_CORE) += relocate_kernel_$(BITS).o +obj-$(CONFIG_KEXEC_CORE) += relocate_kernel_$(BITS).o crash.o obj-$(CONFIG_KEXEC_FILE) += kexec-bzimage64.o -obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o crash.o +obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o obj-y += kprobes/ obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_X86_32) += doublefault_32.o diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 93e1cb4f7ff19..bcb2d640a0cd8 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -209,9 +209,7 @@ static void hv_machine_shutdown(void) if (kexec_in_progress) hyperv_cleanup(); } -#endif /* CONFIG_KEXEC_CORE */ -#ifdef CONFIG_CRASH_DUMP static void hv_machine_crash_shutdown(struct pt_regs *regs) { if (hv_crash_handler) @@ -223,7 +221,7 @@ static void hv_machine_crash_shutdown(struct pt_regs *regs) /* Disable the hypercall page when there is only 1 active CPU. */ hyperv_cleanup(); } -#endif /* CONFIG_CRASH_DUMP */ +#endif /* CONFIG_KEXEC_CORE */ #endif /* CONFIG_HYPERV */ static uint32_t __init ms_hyperv_platform(void) @@ -495,13 +493,9 @@ static void __init ms_hyperv_init_platform(void) no_timer_check = 1; #endif -#if IS_ENABLED(CONFIG_HYPERV) -#if defined(CONFIG_KEXEC_CORE) +#if IS_ENABLED(CONFIG_HYPERV) && defined(CONFIG_KEXEC_CORE) machine_ops.shutdown = hv_machine_shutdown; -#endif -#if defined(CONFIG_CRASH_DUMP) machine_ops.crash_shutdown = hv_machine_crash_shutdown; -#endif #endif if (ms_hyperv.features & HV_ACCESS_TSC_INVARIANT) { /* diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c index 0de509c02d18b..a61c12c012709 100644 --- a/arch/x86/kernel/kexec-bzimage64.c +++ b/arch/x86/kernel/kexec-bzimage64.c @@ -263,13 +263,11 @@ setup_boot_parameters(struct kimage *image, struct boot_params *params, memset(¶ms->hd0_info, 0, sizeof(params->hd0_info)); memset(¶ms->hd1_info, 0, sizeof(params->hd1_info)); -#ifdef CONFIG_CRASH_DUMP if (image->type == KEXEC_TYPE_CRASH) { ret = crash_setup_memmap_entries(image, params); if (ret) return ret; } else -#endif setup_e820_entries(params); nr_e820_entries = params->e820_entries; @@ -430,14 +428,12 @@ static void *bzImage64_load(struct kimage *image, char *kernel, return ERR_PTR(-EINVAL); } -#ifdef CONFIG_CRASH_DUMP /* Allocate and load backup region */ if (image->type == KEXEC_TYPE_CRASH) { ret = crash_load_segments(image); if (ret) return ERR_PTR(ret); } -#endif /* * Load purgatory. For 64bit entry point, purgatory code can be diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 38d88c8b56ec0..b8ab9ee5896c1 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -769,7 +769,7 @@ static struct notifier_block kvm_pv_reboot_nb = { * won't be valid. In cases like kexec, in which you install a new kernel, this * means a random memory location will be kept being written. */ -#ifdef CONFIG_CRASH_DUMP +#ifdef CONFIG_KEXEC_CORE static void kvm_crash_shutdown(struct pt_regs *regs) { kvm_guest_cpu_offline(true); @@ -852,7 +852,7 @@ static void __init kvm_guest_init(void) kvm_guest_cpu_init(); #endif -#ifdef CONFIG_CRASH_DUMP +#ifdef CONFIG_KEXEC_CORE machine_ops.crash_shutdown = kvm_crash_shutdown; #endif diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index aaeac2deb85dc..2fa12d1dc6760 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -545,8 +545,6 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image) } #endif /* CONFIG_KEXEC_FILE */ -#ifdef CONFIG_CRASH_DUMP - static int kexec_mark_range(unsigned long start, unsigned long end, bool protect) { @@ -591,7 +589,6 @@ void arch_kexec_unprotect_crashkres(void) { kexec_mark_crashkres(false); } -#endif /* * During a traditional boot under SME, SME will encrypt the kernel, diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index f3130f762784a..830425e6d38e2 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -796,7 +796,7 @@ struct machine_ops machine_ops __ro_after_init = { .emergency_restart = native_machine_emergency_restart, .restart = native_machine_restart, .halt = native_machine_halt, -#ifdef CONFIG_CRASH_DUMP +#ifdef CONFIG_KEXEC_CORE .crash_shutdown = native_machine_crash_shutdown, #endif }; @@ -826,7 +826,7 @@ void machine_halt(void) machine_ops.halt(); } -#ifdef CONFIG_CRASH_DUMP +#ifdef CONFIG_KEXEC_CORE void machine_crash_shutdown(struct pt_regs *regs) { machine_ops.crash_shutdown(regs); diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index b66f909bf1cd1..44148dcfcec2c 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -548,7 +548,7 @@ static void __init reserve_crashkernel(void) bool high = false; int ret; - if (!IS_ENABLED(CONFIG_CRASH_RESERVE)) + if (!IS_ENABLED(CONFIG_KEXEC_CORE)) return; total_mem = memblock_phys_mem_size(); diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 52c3823b72119..96a771f9f930a 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -282,7 +282,7 @@ struct smp_ops smp_ops = { .smp_cpus_done = native_smp_cpus_done, .stop_other_cpus = native_stop_other_cpus, -#if defined(CONFIG_CRASH_DUMP) +#if defined(CONFIG_KEXEC_CORE) .crash_stop_other_cpus = kdump_nmi_shootdown_cpus, #endif .smp_send_reschedule = native_smp_send_reschedule, diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c index ade22feee7aeb..70be57e8f51ca 100644 --- a/arch/x86/xen/enlighten_hvm.c +++ b/arch/x86/xen/enlighten_hvm.c @@ -141,9 +141,7 @@ static void xen_hvm_shutdown(void) if (kexec_in_progress) xen_reboot(SHUTDOWN_soft_reset); } -#endif -#ifdef CONFIG_CRASH_DUMP static void xen_hvm_crash_shutdown(struct pt_regs *regs) { native_machine_crash_shutdown(regs); @@ -231,8 +229,6 @@ static void __init xen_hvm_guest_init(void) #ifdef CONFIG_KEXEC_CORE machine_ops.shutdown = xen_hvm_shutdown; -#endif -#ifdef CONFIG_CRASH_DUMP machine_ops.crash_shutdown = xen_hvm_crash_shutdown; #endif } diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index bfd57d07f4b5e..6b201e64d8abc 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -2517,7 +2517,7 @@ int xen_remap_pfn(struct vm_area_struct *vma, unsigned long addr, } EXPORT_SYMBOL_GPL(xen_remap_pfn); -#ifdef CONFIG_VMCORE_INFO +#ifdef CONFIG_KEXEC_CORE phys_addr_t paddr_vmcoreinfo_note(void) { if (xen_pv_domain()) From 91cc2a7172509b353d9bbb1291e543e3169bc0f8 Mon Sep 17 00:00:00 2001 From: Naman Jain Date: Tue, 17 Sep 2024 11:09:17 +0530 Subject: [PATCH 215/216] x86/hyperv: Fix hv tsc page based sched_clock for hibernation commit bcc80dec91ee745b3d66f3e48f0ec2efdea97149 upstream. read_hv_sched_clock_tsc() assumes that the Hyper-V clock counter is bigger than the variable hv_sched_clock_offset, which is cached during early boot, but depending on the timing this assumption may be false when a hibernated VM starts again (the clock counter starts from 0 again) and is resuming back (Note: hv_init_tsc_clocksource() is not called during hibernation/resume); consequently, read_hv_sched_clock_tsc() may return a negative integer (which is interpreted as a huge positive integer since the return type is u64) and new kernel messages are prefixed with huge timestamps before read_hv_sched_clock_tsc() grows big enough (which typically takes several seconds). Fix the issue by saving the Hyper-V clock counter just before the suspend, and using it to correct the hv_sched_clock_offset in resume. This makes hv tsc page based sched_clock continuous and ensures that post resume, it starts from where it left off during suspend. Override x86_platform.save_sched_clock_state and x86_platform.restore_sched_clock_state routines to correct this as soon as possible. Note: if Invariant TSC is available, the issue doesn't happen because 1) we don't register read_hv_sched_clock_tsc() for sched clock: See commit e5313f1c5404 ("clocksource/drivers/hyper-v: Rework clocksource and sched clock setup"); 2) the common x86 code adjusts TSC similarly: see __restore_processor_state() -> tsc_verify_tsc_adjust(true) and x86_platform.restore_sched_clock_state(). Cc: stable@vger.kernel.org Fixes: 1349401ff1aa ("clocksource/drivers/hyper-v: Suspend/resume Hyper-V clocksource for hibernation") Co-developed-by: Dexuan Cui Signed-off-by: Dexuan Cui Signed-off-by: Naman Jain Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20240917053917.76787-1-namjain@linux.microsoft.com Signed-off-by: Wei Liu Message-ID: <20240917053917.76787-1-namjain@linux.microsoft.com> Signed-off-by: Greg Kroah-Hartman (cherry picked from commit a6923798e471570ac1b24086be0a9679f51c3171) --- arch/x86/kernel/cpu/mshyperv.c | 58 ++++++++++++++++++++++++++++++ drivers/clocksource/hyperv_timer.c | 14 +++++++- include/clocksource/hyperv_timer.h | 2 ++ 3 files changed, 73 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index bcb2d640a0cd8..5ae77d966cafe 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -222,6 +222,63 @@ static void hv_machine_crash_shutdown(struct pt_regs *regs) hyperv_cleanup(); } #endif /* CONFIG_KEXEC_CORE */ + +static u64 hv_ref_counter_at_suspend; +static void (*old_save_sched_clock_state)(void); +static void (*old_restore_sched_clock_state)(void); + +/* + * Hyper-V clock counter resets during hibernation. Save and restore clock + * offset during suspend/resume, while also considering the time passed + * before suspend. This is to make sure that sched_clock using hv tsc page + * based clocksource, proceeds from where it left off during suspend and + * it shows correct time for the timestamps of kernel messages after resume. + */ +static void save_hv_clock_tsc_state(void) +{ + hv_ref_counter_at_suspend = hv_read_reference_counter(); +} + +static void restore_hv_clock_tsc_state(void) +{ + /* + * Adjust the offsets used by hv tsc clocksource to + * account for the time spent before hibernation. + * adjusted value = reference counter (time) at suspend + * - reference counter (time) now. + */ + hv_adj_sched_clock_offset(hv_ref_counter_at_suspend - hv_read_reference_counter()); +} + +/* + * Functions to override save_sched_clock_state and restore_sched_clock_state + * functions of x86_platform. The Hyper-V clock counter is reset during + * suspend-resume and the offset used to measure time needs to be + * corrected, post resume. + */ +static void hv_save_sched_clock_state(void) +{ + old_save_sched_clock_state(); + save_hv_clock_tsc_state(); +} + +static void hv_restore_sched_clock_state(void) +{ + restore_hv_clock_tsc_state(); + old_restore_sched_clock_state(); +} + +static void __init x86_setup_ops_for_tsc_pg_clock(void) +{ + if (!(ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE)) + return; + + old_save_sched_clock_state = x86_platform.save_sched_clock_state; + x86_platform.save_sched_clock_state = hv_save_sched_clock_state; + + old_restore_sched_clock_state = x86_platform.restore_sched_clock_state; + x86_platform.restore_sched_clock_state = hv_restore_sched_clock_state; +} #endif /* CONFIG_HYPERV */ static uint32_t __init ms_hyperv_platform(void) @@ -572,6 +629,7 @@ static void __init ms_hyperv_init_platform(void) /* Register Hyper-V specific clocksource */ hv_init_clocksource(); + x86_setup_ops_for_tsc_pg_clock(); hv_vtl_init_platform(); #endif /* diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c index 8ff7cd4e20bb1..5eec1457e1396 100644 --- a/drivers/clocksource/hyperv_timer.c +++ b/drivers/clocksource/hyperv_timer.c @@ -27,7 +27,8 @@ #include static struct clock_event_device __percpu *hv_clock_event; -static u64 hv_sched_clock_offset __ro_after_init; +/* Note: offset can hold negative values after hibernation. */ +static u64 hv_sched_clock_offset __read_mostly; /* * If false, we're using the old mechanism for stimer0 interrupts @@ -456,6 +457,17 @@ static void resume_hv_clock_tsc(struct clocksource *arg) hv_set_register(HV_REGISTER_REFERENCE_TSC, tsc_msr.as_uint64); } +/* + * Called during resume from hibernation, from overridden + * x86_platform.restore_sched_clock_state routine. This is to adjust offsets + * used to calculate time for hv tsc page based sched_clock, to account for + * time spent before hibernation. + */ +void hv_adj_sched_clock_offset(u64 offset) +{ + hv_sched_clock_offset -= offset; +} + #ifdef HAVE_VDSO_CLOCKMODE_HVCLOCK static int hv_cs_enable(struct clocksource *cs) { diff --git a/include/clocksource/hyperv_timer.h b/include/clocksource/hyperv_timer.h index 6cdc873ac907f..aa5233b1eba97 100644 --- a/include/clocksource/hyperv_timer.h +++ b/include/clocksource/hyperv_timer.h @@ -38,6 +38,8 @@ extern void hv_remap_tsc_clocksource(void); extern unsigned long hv_get_tsc_pfn(void); extern struct ms_hyperv_tsc_page *hv_get_tsc_page(void); +extern void hv_adj_sched_clock_offset(u64 offset); + static __always_inline bool hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg, u64 *cur_tsc, u64 *time) From 277827dbbcec59394b334c59f65694a482883993 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 10 Jan 2025 14:31:36 +0100 Subject: [PATCH 216/216] Linux 6.6.71 Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 843e64492a7ed11436cc5c9bbfba46835939071a) --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index bf9e6120d2710..47e281657892c 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 6 -SUBLEVEL = 70 +SUBLEVEL = 71 EXTRAVERSION = NAME = Pinguïn Aangedreven