tree:
https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux.git habanalabs-next
head: 3831e4448b36c1d2b9b326e9580ce4433cc28e30
commit: 8d89f93c080c601ffb031f59244ee8dda8344543 [52/64] habanalabs/gaudi: fetch HBM ecc
info from FW
config: ia64-randconfig-r002-20201119 (attached as .config)
compiler: ia64-linux-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
wget
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O
~/bin/make.cross
chmod +x ~/bin/make.cross
#
https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux.git/commit/...
git remote add ogabbay
https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux.git
git fetch --no-tags ogabbay habanalabs-next
git checkout 8d89f93c080c601ffb031f59244ee8dda8344543
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=ia64
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp(a)intel.com>
All warnings (new ones prefixed by >>):
drivers/misc/habanalabs/gaudi/gaudi.c: In function
'gaudi_hbm_read_interrupts':
> drivers/misc/habanalabs/gaudi/gaudi.c:6845:59: warning: variable
'type' set but not used [-Wunused-but-set-variable]
6845 | u32 base,
val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
| ^~~~
drivers/misc/habanalabs/gaudi/gaudi.c: At top level:
drivers/misc/habanalabs/gaudi/gaudi.c:7895:6: warning: no previous prototype for
'gaudi_ctx_fini' [-Wmissing-prototypes]
7895 | void gaudi_ctx_fini(struct hl_ctx *ctx)
| ^~~~~~~~~~~~~~
vim +/type +6845 drivers/misc/habanalabs/gaudi/gaudi.c
6841
6842 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
6843 struct hl_eq_hbm_ecc_data *hbm_ecc_data)
6844 {
6845 u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type,
ch;
6846 int err = 0;
6847
6848 if (!hdev->asic_prop.fw_security_disabled) {
6849 if (!hbm_ecc_data) {
6850 dev_err(hdev->dev, "No FW ECC data");
6851 return 0;
6852 }
6853
6854 wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
6855 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
6856 rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
6857 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
6858 ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
6859 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
6860 derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
6861 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
6862 serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
6863 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
6864 type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
6865 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
6866 ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
6867 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
6868
6869 dev_err(hdev->dev,
6870 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d,
DERR=%d\n",
6871 device, ch, wr_par, rd_par, ca_par, serr, derr);
6872
6873 err = 1;
6874
6875 return 0;
6876 }
6877
6878 base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
6879 for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
6880 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
6881 val = (val & 0xFF) | ((val >> 8) & 0xFF);
6882 if (val) {
6883 err = 1;
6884 dev_err(hdev->dev,
6885 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d,
DERR=%d\n",
6886 device, ch * 2, val & 0x1, (val >> 1) & 0x1,
6887 (val >> 2) & 0x1, (val >> 3) & 0x1,
6888 (val >> 4) & 0x1);
6889
6890 val2 = RREG32(base + ch * 0x1000 + 0x060);
6891 dev_err(hdev->dev,
6892 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d,
SEC_CNT=%d, DEC_CNT=%d\n",
6893 device, ch * 2,
6894 RREG32(base + ch * 0x1000 + 0x064),
6895 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
6896 (val2 & 0xFF0000) >> 16,
6897 (val2 & 0xFF000000) >> 24);
6898 }
6899
6900 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
6901 val = (val & 0xFF) | ((val >> 8) & 0xFF);
6902 if (val) {
6903 err = 1;
6904 dev_err(hdev->dev,
6905 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d,
DERR=%d\n",
6906 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
6907 (val >> 2) & 0x1, (val >> 3) & 0x1,
6908 (val >> 4) & 0x1);
6909
6910 val2 = RREG32(base + ch * 0x1000 + 0x070);
6911 dev_err(hdev->dev,
6912 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d,
SEC_CNT=%d, DEC_CNT=%d\n",
6913 device, ch * 2 + 1,
6914 RREG32(base + ch * 0x1000 + 0x074),
6915 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
6916 (val2 & 0xFF0000) >> 16,
6917 (val2 & 0xFF000000) >> 24);
6918 }
6919
6920 /* Clear interrupts */
6921 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
6922 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
6923 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
6924 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
6925 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
6926 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
6927 }
6928
6929 val = RREG32(base + 0x8F30);
6930 val2 = RREG32(base + 0x8F34);
6931 if (val | val2) {
6932 err = 1;
6933 dev_err(hdev->dev,
6934 "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
6935 device, val, val2);
6936 }
6937 val = RREG32(base + 0x8F40);
6938 val2 = RREG32(base + 0x8F44);
6939 if (val | val2) {
6940 err = 1;
6941 dev_err(hdev->dev,
6942 "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
6943 device, val, val2);
6944 }
6945
6946 return err;
6947 }
6948
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org