diff options
Diffstat (limited to '0001-Renaming-cupti_common.-to-papi_cupti_common.-to-avoi.patch')
-rw-r--r-- | 0001-Renaming-cupti_common.-to-papi_cupti_common.-to-avoi.patch | 868 |
1 files changed, 868 insertions, 0 deletions
diff --git a/0001-Renaming-cupti_common.-to-papi_cupti_common.-to-avoi.patch b/0001-Renaming-cupti_common.-to-papi_cupti_common.-to-avoi.patch new file mode 100644 index 000000000000..9cd05605b675 --- /dev/null +++ b/0001-Renaming-cupti_common.-to-papi_cupti_common.-to-avoi.patch @@ -0,0 +1,868 @@ +From 0207d4bf093674d9598509c917e7a22a09796b52 Mon Sep 17 00:00:00 2001 +From: Treece Burgess <tburgess@leconte.cluster> +Date: Tue, 16 Jul 2024 17:51:08 +0000 +Subject: [PATCH 1/2] Renaming cupti_common.* to papi_cupti_common.* to avoid + file name collision with NVIDIA. Fixes build issues in Cuda >= 12.4. + +--- + src/components/cuda/Rules.cuda | 8 +- + src/components/cuda/cupti_common.c | 658 --------------------------- + src/components/cuda/cupti_common.h | 110 ----- + src/components/cuda/cupti_dispatch.c | 2 +- + src/components/cuda/cupti_events.c | 2 +- + src/components/cuda/cupti_profiler.c | 2 +- + 6 files changed, 7 insertions(+), 775 deletions(-) + delete mode 100644 src/components/cuda/cupti_common.c + delete mode 100644 src/components/cuda/cupti_common.h + +diff --git a/src/components/cuda/Rules.cuda b/src/components/cuda/Rules.cuda +index 8656cf468..f8935cdd8 100644 +--- a/src/components/cuda/Rules.cuda ++++ b/src/components/cuda/Rules.cuda +@@ -8,11 +8,11 @@ CUDA_MACS+= -DPAPI_CUDA_CUPTI=$(PAPI_CUDA_CUPTI) -DPAPI_CUDA_PERFWORKS=$(PAPI_CU + COMPSRCS += components/cuda/linux-cuda.c \ + components/cuda/cupti_dispatch.c \ + components/cuda/cupti_utils.c \ +- components/cuda/cupti_common.c \ ++ components/cuda/papi_cupti_common.c \ + components/cuda/cupti_profiler.c \ + components/cuda/cupti_events.c \ + +-COMPOBJS += linux-cuda.o cupti_dispatch.o cupti_utils.o cupti_common.o cupti_profiler.o cupti_events.o ++COMPOBJS += linux-cuda.o cupti_dispatch.o cupti_utils.o papi_cupti_common.o cupti_profiler.o cupti_events.o + + # CFLAGS specifies compile flags; need include files here, and macro defines. + CFLAGS += -I$(PAPI_CUDA_ROOT)/include -I$(PAPI_CUDA_ROOT)/extras/CUPTI/include -g $(CUDA_MACS) +@@ -27,8 +27,8 @@ cupti_dispatch.o: components/cuda/cupti_dispatch.c + cupti_utils.o: components/cuda/cupti_utils.c $(HEADERS) + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/cuda/cupti_utils.c -o cupti_utils.o + +-cupti_common.o: components/cuda/cupti_common.c +- $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/cuda/cupti_common.c -o cupti_common.o ++papi_cupti_common.o: components/cuda/papi_cupti_common.c ++ $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/cuda/papi_cupti_common.c -o papi_cupti_common.o + + cupti_profiler.o: components/cuda/cupti_profiler.c + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/cuda/cupti_profiler.c -o cupti_profiler.o +diff --git a/src/components/cuda/cupti_common.c b/src/components/cuda/cupti_common.c +deleted file mode 100644 +index a7d7de52c..000000000 +--- a/src/components/cuda/cupti_common.c ++++ /dev/null +@@ -1,658 +0,0 @@ +-/** +- * @file cupti_common.c +- * @author Anustuv Pal +- * anustuv@icl.utk.edu +- */ +- +-#include <dlfcn.h> +-#include <link.h> +-#include <libgen.h> +-#include <papi.h> +-#include "papi_memory.h" +- +-#include "cupti_config.h" +-#include "cupti_common.h" +- +-static void *dl_drv, *dl_rt; +- +-const char *linked_cudart_path; +-void *dl_cupti; +- +-unsigned int _cuda_lock; +- +-CUresult ( *cuCtxGetCurrentPtr ) (CUcontext *); +-CUresult ( *cuCtxSetCurrentPtr ) (CUcontext); +-CUresult ( *cuCtxDestroyPtr ) (CUcontext); +-CUresult ( *cuCtxCreatePtr ) (CUcontext *pctx, unsigned int flags, CUdevice dev); +-CUresult ( *cuCtxGetDevicePtr ) (CUdevice *); +-CUresult ( *cuDeviceGetPtr ) (CUdevice *, int); +-CUresult ( *cuDeviceGetCountPtr ) (int *); +-CUresult ( *cuDeviceGetNamePtr ) (char *, int, CUdevice); +-CUresult ( *cuDevicePrimaryCtxRetainPtr ) (CUcontext *pctx, CUdevice); +-CUresult ( *cuDevicePrimaryCtxReleasePtr ) (CUdevice); +-CUresult ( *cuInitPtr ) (unsigned int); +-CUresult ( *cuGetErrorStringPtr ) (CUresult error, const char** pStr); +-CUresult ( *cuCtxPopCurrentPtr ) (CUcontext * pctx); +-CUresult ( *cuCtxPushCurrentPtr ) (CUcontext pctx); +-CUresult ( *cuCtxSynchronizePtr ) (); +-CUresult ( *cuDeviceGetAttributePtr ) (int *, CUdevice_attribute, CUdevice); +- +-cudaError_t ( *cudaGetDeviceCountPtr ) (int *); +-cudaError_t ( *cudaGetDevicePtr ) (int *); +-const char *( *cudaGetErrorStringPtr ) (cudaError_t); +-cudaError_t ( *cudaSetDevicePtr ) (int); +-cudaError_t ( *cudaGetDevicePropertiesPtr ) (struct cudaDeviceProp* prop, int device); +-cudaError_t ( *cudaDeviceGetAttributePtr ) (int *value, enum cudaDeviceAttr attr, int device); +-cudaError_t ( *cudaFreePtr ) (void *); +-cudaError_t ( *cudaDriverGetVersionPtr ) (int *); +-cudaError_t ( *cudaRuntimeGetVersionPtr ) (int *); +- +-CUptiResult ( *cuptiGetVersionPtr ) (uint32_t* ); +- +-static int load_cuda_sym(void) +-{ +- dl_drv = dlopen("libcuda.so", RTLD_NOW | RTLD_GLOBAL); +- if (!dl_drv) { +- ERRDBG("Loading installed libcuda.so failed. Check that cuda drivers are installed.\n"); +- goto fn_fail; +- } +- +- cuCtxSetCurrentPtr = DLSYM_AND_CHECK(dl_drv, "cuCtxSetCurrent"); +- cuCtxGetCurrentPtr = DLSYM_AND_CHECK(dl_drv, "cuCtxGetCurrent"); +- cuCtxDestroyPtr = DLSYM_AND_CHECK(dl_drv, "cuCtxDestroy"); +- cuCtxCreatePtr = DLSYM_AND_CHECK(dl_drv, "cuCtxCreate"); +- cuCtxGetDevicePtr = DLSYM_AND_CHECK(dl_drv, "cuCtxGetDevice"); +- cuDeviceGetPtr = DLSYM_AND_CHECK(dl_drv, "cuDeviceGet"); +- cuDeviceGetCountPtr = DLSYM_AND_CHECK(dl_drv, "cuDeviceGetCount"); +- cuDeviceGetNamePtr = DLSYM_AND_CHECK(dl_drv, "cuDeviceGetName"); +- cuDevicePrimaryCtxRetainPtr = DLSYM_AND_CHECK(dl_drv, "cuDevicePrimaryCtxRetain"); +- cuDevicePrimaryCtxReleasePtr = DLSYM_AND_CHECK(dl_drv, "cuDevicePrimaryCtxRelease"); +- cuInitPtr = DLSYM_AND_CHECK(dl_drv, "cuInit"); +- cuGetErrorStringPtr = DLSYM_AND_CHECK(dl_drv, "cuGetErrorString"); +- cuCtxPopCurrentPtr = DLSYM_AND_CHECK(dl_drv, "cuCtxPopCurrent"); +- cuCtxPushCurrentPtr = DLSYM_AND_CHECK(dl_drv, "cuCtxPushCurrent"); +- cuCtxSynchronizePtr = DLSYM_AND_CHECK(dl_drv, "cuCtxSynchronize"); +- cuDeviceGetAttributePtr = DLSYM_AND_CHECK(dl_drv, "cuDeviceGetAttribute"); +- +- Dl_info info; +- dladdr(cuCtxSetCurrentPtr, &info); +- LOGDBG("CUDA driver library loaded from %s\n", info.dli_fname); +- return PAPI_OK; +-fn_fail: +- return PAPI_EMISC; +-} +- +-static int unload_cuda_sym(void) +-{ +- if (dl_drv) { +- dlclose(dl_drv); +- dl_drv = NULL; +- } +- cuCtxSetCurrentPtr = NULL; +- cuCtxGetCurrentPtr = NULL; +- cuCtxDestroyPtr = NULL; +- cuCtxCreatePtr = NULL; +- cuCtxGetDevicePtr = NULL; +- cuDeviceGetPtr = NULL; +- cuDeviceGetCountPtr = NULL; +- cuDeviceGetNamePtr = NULL; +- cuDevicePrimaryCtxRetainPtr = NULL; +- cuDevicePrimaryCtxReleasePtr = NULL; +- cuInitPtr = NULL; +- cuGetErrorStringPtr = NULL; +- cuCtxPopCurrentPtr = NULL; +- cuCtxPushCurrentPtr = NULL; +- cuCtxSynchronizePtr = NULL; +- cuDeviceGetAttributePtr = NULL; +- return PAPI_OK; +-} +- +-void *cuptic_load_dynamic_syms(const char *parent_path, const char *dlname, const char *search_subpaths[]) +-{ +- void *dl = NULL; +- char lookup_path[PATH_MAX]; +- char *found_files[CUPTIU_MAX_FILES]; +- int i, count; +- for (i = 0; search_subpaths[i] != NULL; i++) { +- sprintf(lookup_path, search_subpaths[i], parent_path, dlname); +- dl = dlopen(lookup_path, RTLD_NOW | RTLD_GLOBAL); +- if (dl) { +- return dl; +- } +- } +- count = cuptiu_files_search_in_path(dlname, parent_path, found_files); +- for (i = 0; i < count; i++) { +- dl = dlopen(found_files[i], RTLD_NOW | RTLD_GLOBAL); +- if (dl) { +- break; +- } +- } +- for (i = 0; i < count; i++) { +- papi_free(found_files[i]); +- } +- return dl; +-} +- +-static int load_cudart_sym(void) +-{ +- char dlname[] = "libcudart.so"; +- char lookup_path[PATH_MAX]; +- +- char *papi_cuda_runtime = getenv("PAPI_CUDA_RUNTIME"); +- if (papi_cuda_runtime) { +- sprintf(lookup_path, "%s/%s", papi_cuda_runtime, dlname); +- dl_rt = dlopen(lookup_path, RTLD_NOW | RTLD_GLOBAL); +- } +- +- const char *standard_paths[] = { +- "%s/lib64/%s", +- NULL, +- }; +- +- if (linked_cudart_path && !dl_rt) { +- dl_rt = cuptic_load_dynamic_syms(linked_cudart_path, dlname, standard_paths); +- } +- +- char *papi_cuda_root = getenv("PAPI_CUDA_ROOT"); +- if (papi_cuda_root && !dl_rt) { +- dl_rt = cuptic_load_dynamic_syms(papi_cuda_root, dlname, standard_paths); +- } +- +- if (!dl_rt) { +- dl_rt = dlopen(dlname, RTLD_NOW | RTLD_GLOBAL); +- if (!dl_rt) { +- ERRDBG("Loading libcudart.so failed. Try setting PAPI_CUDA_ROOT\n"); +- goto fn_fail; +- } +- } +- +- cudaGetDevicePtr = DLSYM_AND_CHECK(dl_rt, "cudaGetDevice"); +- cudaGetDeviceCountPtr = DLSYM_AND_CHECK(dl_rt, "cudaGetDeviceCount"); +- cudaGetDevicePropertiesPtr = DLSYM_AND_CHECK(dl_rt, "cudaGetDeviceProperties"); +- cudaGetErrorStringPtr = DLSYM_AND_CHECK(dl_rt, "cudaGetErrorString"); +- cudaDeviceGetAttributePtr = DLSYM_AND_CHECK(dl_rt, "cudaDeviceGetAttribute"); +- cudaSetDevicePtr = DLSYM_AND_CHECK(dl_rt, "cudaSetDevice"); +- cudaFreePtr = DLSYM_AND_CHECK(dl_rt, "cudaFree"); +- cudaDriverGetVersionPtr = DLSYM_AND_CHECK(dl_rt, "cudaDriverGetVersion"); +- cudaRuntimeGetVersionPtr = DLSYM_AND_CHECK(dl_rt, "cudaRuntimeGetVersion"); +- +- Dl_info info; +- dladdr(cudaGetDevicePtr, &info); +- LOGDBG("CUDA runtime library loaded from %s\n", info.dli_fname); +- return PAPI_OK; +-fn_fail: +- return PAPI_EMISC; +-} +- +-static int unload_cudart_sym(void) +-{ +- if (dl_rt) { +- dlclose(dl_rt); +- dl_rt = NULL; +- } +- cudaGetDevicePtr = NULL; +- cudaGetDeviceCountPtr = NULL; +- cudaGetDevicePropertiesPtr = NULL; +- cudaGetErrorStringPtr = NULL; +- cudaDeviceGetAttributePtr = NULL; +- cudaSetDevicePtr = NULL; +- cudaFreePtr = NULL; +- cudaDriverGetVersionPtr = NULL; +- cudaRuntimeGetVersionPtr = NULL; +- return PAPI_OK; +-} +- +-static int load_cupti_common_sym(void) +-{ +- char dlname[] = "libcupti.so"; +- char lookup_path[PATH_MAX]; +- +- char *papi_cuda_cupti = getenv("PAPI_CUDA_CUPTI"); +- if (papi_cuda_cupti) { +- sprintf(lookup_path, "%s/%s", papi_cuda_cupti, dlname); +- dl_cupti = dlopen(lookup_path, RTLD_NOW | RTLD_GLOBAL); +- } +- +- const char *standard_paths[] = { +- "%s/extras/CUPTI/lib64/%s", +- "%s/lib64/%s", +- NULL, +- }; +- +- if (linked_cudart_path && !dl_cupti) { +- dl_cupti = cuptic_load_dynamic_syms(linked_cudart_path, dlname, standard_paths); +- } +- +- char *papi_cuda_root = getenv("PAPI_CUDA_ROOT"); +- if (papi_cuda_root && !dl_cupti) { +- dl_cupti = cuptic_load_dynamic_syms(papi_cuda_root, dlname, standard_paths); +- } +- +- if (!dl_cupti) { +- dl_cupti = dlopen(dlname, RTLD_NOW | RTLD_GLOBAL); +- if (!dl_cupti) { +- ERRDBG("Loading libcupti.so failed. Try setting PAPI_CUDA_ROOT\n"); +- goto fn_fail; +- } +- } +- +- cuptiGetVersionPtr = DLSYM_AND_CHECK(dl_cupti, "cuptiGetVersion"); +- +- Dl_info info; +- dladdr(cuptiGetVersionPtr, &info); +- LOGDBG("CUPTI library loaded from %s\n", info.dli_fname); +- return PAPI_OK; +-fn_fail: +- return PAPI_EMISC; +-} +- +-static int unload_cupti_common_sym(void) +-{ +- if (dl_cupti) { +- dlclose(dl_cupti); +- dl_cupti = NULL; +- } +- cuptiGetVersionPtr = NULL; +- return PAPI_OK; +-} +- +-static int util_load_cuda_sym(void) +-{ +- int papi_errno; +- papi_errno = load_cuda_sym(); +- papi_errno += load_cudart_sym(); +- papi_errno += load_cupti_common_sym(); +- if (papi_errno != PAPI_OK) { +- return PAPI_EMISC; +- } +- else +- return PAPI_OK; +-} +- +-static void unload_linked_cudart_path(void) +-{ +- if (linked_cudart_path) { +- papi_free((void*) linked_cudart_path); +- linked_cudart_path = NULL; +- } +-} +- +-int cuptic_shutdown(void) +-{ +- unload_cuda_sym(); +- unload_cudart_sym(); +- unload_cupti_common_sym(); +- unload_linked_cudart_path(); +- return PAPI_OK; +-} +- +-static int util_dylib_cu_runtime_version(void) +-{ +- int runtimeVersion; +- CUDART_CALL(cudaRuntimeGetVersionPtr(&runtimeVersion), return PAPI_EMISC ); +- return runtimeVersion; +-} +- +-static int util_dylib_cupti_version(void) +-{ +- unsigned int cuptiVersion; +- CUPTI_CALL(cuptiGetVersionPtr(&cuptiVersion), return PAPI_EMISC ); +- return cuptiVersion; +-} +- +-int cuptic_device_get_count(int *num_gpus) +-{ +- cudaError_t cuda_errno = cudaGetDeviceCountPtr(num_gpus); +- if (cuda_errno != cudaSuccess) { +- cuptic_disabled_reason_set(cudaGetErrorStringPtr(cuda_errno)); +- return PAPI_EMISC; +- } +- return PAPI_OK; +-} +- +-static int get_gpu_compute_capability(int dev_num, int *cc) +-{ +- int cc_major, cc_minor; +- cudaError_t cuda_errno; +- cuda_errno = cudaDeviceGetAttributePtr(&cc_major, cudaDevAttrComputeCapabilityMajor, dev_num); +- if (cuda_errno != cudaSuccess) { +- cuptic_disabled_reason_set(cudaGetErrorStringPtr(cuda_errno)); +- return PAPI_EMISC; +- } +- cuda_errno = cudaDeviceGetAttributePtr(&cc_minor, cudaDevAttrComputeCapabilityMinor, dev_num); +- if (cuda_errno != cudaSuccess) { +- cuptic_disabled_reason_set(cudaGetErrorStringPtr(cuda_errno)); +- return PAPI_EMISC; +- } +- *cc = cc_major * 10 + cc_minor; +- return PAPI_OK; +-} +- +-typedef enum {GPU_COLLECTION_UNKNOWN, GPU_COLLECTION_ALL_PERF, GPU_COLLECTION_MIXED, GPU_COLLECTION_ALL_EVENTS, GPU_COLLECTION_ALL_CC70} gpu_collection_e; +- +-static int util_gpu_collection_kind(gpu_collection_e *coll_kind) +-{ +- int papi_errno = PAPI_OK; +- static gpu_collection_e kind = GPU_COLLECTION_UNKNOWN; +- if (kind != GPU_COLLECTION_UNKNOWN) { +- goto fn_exit; +- } +- +- int total_gpus; +- papi_errno = cuptic_device_get_count(&total_gpus); +- if (papi_errno != PAPI_OK) { +- goto fn_exit; +- } +- +- int i, cc; +- int count_perf = 0, count_evt = 0, count_cc70 = 0; +- for (i=0; i<total_gpus; i++) { +- papi_errno = get_gpu_compute_capability(i, &cc); +- if (papi_errno != PAPI_OK) { +- return papi_errno; +- } +- if (cc == 70) { +- ++count_cc70; +- } +- if (cc >= 70) { +- ++count_perf; +- } +- if (cc <= 70) { +- ++count_evt; +- } +- } +- if (count_cc70 == total_gpus) { +- kind = GPU_COLLECTION_ALL_CC70; +- goto fn_exit; +- } +- if (count_perf == total_gpus) { +- kind = GPU_COLLECTION_ALL_PERF; +- goto fn_exit; +- } +- if (count_evt == total_gpus) { +- kind = GPU_COLLECTION_ALL_EVENTS; +- goto fn_exit; +- } +- kind = GPU_COLLECTION_MIXED; +- +-fn_exit: +- *coll_kind = kind; +- return papi_errno; +-} +- +-const char *cuptic_disabled_reason_g; +- +-void cuptic_disabled_reason_set(const char *msg) +-{ +- cuptic_disabled_reason_g = msg; +-} +- +-void cuptic_disabled_reason_get(const char **pmsg) +-{ +- *pmsg = cuptic_disabled_reason_g; +-} +- +-static int dl_iterate_phdr_cb(struct dl_phdr_info *info, __attribute__((unused)) size_t size, __attribute__((unused)) void *data) +-{ +- const char *library_name = "libcudart.so"; +- char *library_path = strdup(info->dlpi_name); +- +- if (library_path != NULL && strstr(library_path, library_name) != NULL) { +- linked_cudart_path = strdup(dirname(dirname((char *) library_path))); +- } +- +- free(library_path); +- return PAPI_OK; +-} +- +-static int get_user_cudart_path(void) +-{ +- dl_iterate_phdr(dl_iterate_phdr_cb, NULL); +- if (NULL == linked_cudart_path) { +- return PAPI_EMISC; +- } +- return PAPI_OK; +-} +- +-int cuptic_init(void) +-{ +- int papi_errno = get_user_cudart_path(); +- if (papi_errno == PAPI_OK) { +- LOGDBG("Linked cudart root: %s\n", linked_cudart_path); +- } +- else { +- LOGDBG("Target application not linked with cuda runtime libraries.\n"); +- } +- papi_errno = util_load_cuda_sym(); +- if (papi_errno != PAPI_OK) { +- cuptic_disabled_reason_set("Unable to load CUDA library functions."); +- goto fn_exit; +- } +- +- gpu_collection_e kind; +- papi_errno = util_gpu_collection_kind(&kind); +- if (papi_errno != PAPI_OK) { +- goto fn_exit; +- } +- +- if (kind == GPU_COLLECTION_MIXED) { +- cuptic_disabled_reason_set("No support for systems with mixed compute capabilities, such as CC < 7.0 and CC > 7.0 GPUS."); +- papi_errno = PAPI_ECMP; +- goto fn_exit; +- } +-fn_exit: +- return papi_errno; +-} +- +-int cuptic_is_runtime_perfworks_api(void) +-{ +- static int is_perfworks_api = -1; +- if (is_perfworks_api != -1) { +- goto fn_exit; +- } +- char *papi_cuda_110_cc70_perfworks_api = getenv("PAPI_CUDA_110_CC_70_PERFWORKS_API"); +- +- gpu_collection_e gpus_kind; +- int papi_errno = util_gpu_collection_kind(&gpus_kind); +- if (papi_errno != PAPI_OK) { +- goto fn_exit; +- } +- +- unsigned int cuptiVersion = util_dylib_cupti_version(); +- +- if (gpus_kind == GPU_COLLECTION_ALL_CC70 && +- (cuptiVersion == CUPTI_PROFILER_API_MIN_SUPPORTED_VERSION || util_dylib_cu_runtime_version() == 11000)) +- { +- if (papi_cuda_110_cc70_perfworks_api != NULL) { +- is_perfworks_api = 1; +- goto fn_exit; +- } +- else { +- is_perfworks_api = 0; +- goto fn_exit; +- } +- } +- +- if ((gpus_kind == GPU_COLLECTION_ALL_PERF || gpus_kind == GPU_COLLECTION_ALL_CC70) && cuptiVersion >= CUPTI_PROFILER_API_MIN_SUPPORTED_VERSION) { +- is_perfworks_api = 1; +- goto fn_exit; +- } else { +- is_perfworks_api = 0; +- goto fn_exit; +- } +- +-fn_exit: +- return is_perfworks_api; +-} +- +-int cuptic_is_runtime_events_api(void) +-{ +- static int is_events_api = -1; +- if (is_events_api != -1) { +- goto fn_exit; +- } +- +- gpu_collection_e gpus_kind; +- int papi_errno = util_gpu_collection_kind(&gpus_kind); +- if (papi_errno != PAPI_OK) { +- goto fn_exit; +- } +- +- /* +- * See cupti_config.h: When NVIDIA removes the events API add a check in the following condition +- * to check the `util_dylib_cupti_version()` is also <= CUPTI_EVENTS_API_MAX_SUPPORTED_VERSION. +- */ +- if ((gpus_kind == GPU_COLLECTION_ALL_EVENTS || gpus_kind == GPU_COLLECTION_ALL_CC70)) { +- is_events_api = 1; +- goto fn_exit; +- } else { +- is_events_api = 0; +- goto fn_exit; +- } +-fn_exit: +- return is_events_api; +-} +- +-struct cuptic_info { +- CUcontext ctx; +-}; +- +-int cuptic_ctxarr_create(cuptic_info_t *pinfo) +-{ +- COMPDBG("Entering.\n"); +- int total_gpus; +- int papi_errno = cuptic_device_get_count(&total_gpus); +- if (papi_errno != PAPI_OK) { +- return PAPI_EMISC; +- } +- cuptic_info_t cuCtx = (cuptic_info_t) papi_calloc (total_gpus, sizeof(*pinfo)); +- if (cuCtx == NULL) { +- return PAPI_ENOMEM; +- } +- *pinfo = cuCtx; +- return PAPI_OK; +-} +- +-int cuptic_ctxarr_update_current(cuptic_info_t info) +-{ +- int papi_errno, gpu_id; +- CUcontext tempCtx; +- papi_errno = cudaGetDevicePtr(&gpu_id); +- if (papi_errno != cudaSuccess) { +- return PAPI_EMISC; +- } +- papi_errno = cuCtxGetCurrentPtr(&tempCtx); +- if (papi_errno != CUDA_SUCCESS) { +- return PAPI_EMISC; +- } +- if (info[gpu_id].ctx == NULL) { +- if (tempCtx != NULL) { +- LOGDBG("Registering device = %d with ctx = %p.\n", gpu_id, tempCtx); +- CUDA_CALL(cuCtxGetCurrentPtr(&info[gpu_id].ctx), return PAPI_EMISC); +- } +- else { +- CUDART_CALL(cudaFreePtr(NULL), return PAPI_EMISC); +- CUDA_CALL(cuCtxGetCurrentPtr(&info[gpu_id].ctx), return PAPI_EMISC); +- LOGDBG("Using primary device context %p for device %d.\n", info[gpu_id].ctx, gpu_id); +- } +- } +- /* If context has changed keep the first seen one but with warning */ +- else if (info[gpu_id].ctx != tempCtx) { +- ERRDBG("Warning: cuda context for gpu %d has changed from %p to %p\n", gpu_id, info[gpu_id].ctx, tempCtx); +- } +- return PAPI_OK; +-} +- +-int cuptic_ctxarr_get_ctx(cuptic_info_t info, int gpu_idx, CUcontext *ctx) +-{ +- *ctx = info[gpu_idx].ctx; +- return PAPI_OK; +-} +- +-int cuptic_ctxarr_destroy(cuptic_info_t *pinfo) +-{ +- papi_free(*pinfo); +- *pinfo = NULL; +- return PAPI_OK; +-} +- +-/* Functions based on bitmasking to detect gpu exclusivity */ +-typedef int64_t gpu_occupancy_t; +-static gpu_occupancy_t global_gpu_bitmask; +- +-static int event_name_get_gpuid(const char *name, int *gpuid) +-{ +- int papi_errno = PAPI_OK; +- char *token; +- char *copy = strdup(name); +- +- token = strtok(copy, "="); +- if (token == NULL) { +- goto fn_fail; +- } +- token = strtok(NULL, "\0"); +- if (token == NULL) { +- goto fn_fail; +- } +- *gpuid = strtol(token, NULL, 10); +- +-fn_exit: +- papi_free(copy); +- return papi_errno; +-fn_fail: +- papi_errno = PAPI_EINVAL; +- goto fn_exit; +-} +- +-static int _devmask_events_get(cuptiu_event_table_t *evt_table, gpu_occupancy_t *bitmask) +-{ +- int papi_errno = PAPI_OK, gpu_id; +- long i; +- gpu_occupancy_t acq_mask = 0; +- cuptiu_event_t *evt_rec; +- for (i = 0; i < evt_table->count; i++) { +- papi_errno = cuptiu_event_table_get_item(evt_table, i, (cuptiu_event_t **) &evt_rec); +- if (papi_errno != PAPI_OK) { +- goto fn_exit; +- } +- papi_errno = event_name_get_gpuid(evt_rec->name, &gpu_id); +- if (papi_errno != PAPI_OK) { +- goto fn_exit; +- } +- acq_mask |= (1 << gpu_id); +- } +- *bitmask = acq_mask; +-fn_exit: +- return papi_errno; +-} +- +-int cuptic_device_acquire(cuptiu_event_table_t *evt_table) +-{ +- gpu_occupancy_t bitmask; +- int papi_errno = _devmask_events_get(evt_table, &bitmask); +- if (papi_errno != PAPI_OK) +- return papi_errno; +- if (bitmask & global_gpu_bitmask) { +- return PAPI_ECNFLCT; +- } +- _papi_hwi_lock(_cuda_lock); +- global_gpu_bitmask |= bitmask; +- _papi_hwi_unlock(_cuda_lock); +- return PAPI_OK; +-} +- +-int cuptic_device_release(cuptiu_event_table_t *evt_table) +-{ +- gpu_occupancy_t bitmask; +- int papi_errno = _devmask_events_get(evt_table, &bitmask); +- if (papi_errno != PAPI_OK) { +- return papi_errno; +- } +- if ((bitmask & global_gpu_bitmask) != bitmask) { +- return PAPI_EMISC; +- } +- _papi_hwi_lock(_cuda_lock); +- global_gpu_bitmask ^= bitmask; +- _papi_hwi_unlock(_cuda_lock); +- return PAPI_OK; +-} +diff --git a/src/components/cuda/cupti_common.h b/src/components/cuda/cupti_common.h +deleted file mode 100644 +index 89e5f9623..000000000 +--- a/src/components/cuda/cupti_common.h ++++ /dev/null +@@ -1,110 +0,0 @@ +-/** +- * @file cupti_common.h +- * @author Anustuv Pal +- * anustuv@icl.utk.edu +- */ +- +-#ifndef __CUPTI_COMMON_H__ +-#define __CUPTI_COMMON_H__ +- +-#include <stdio.h> +-#include <cuda.h> +-#include <cupti.h> +- +-#include "cupti_utils.h" +-#include "lcuda_debug.h" +- +-extern const char *linked_cudart_path; +-extern void *dl_cupti; +- +-extern unsigned int _cuda_lock; +- +-/* cuda driver function pointers */ +-extern CUresult ( *cuCtxGetCurrentPtr ) (CUcontext *); +-extern CUresult ( *cuCtxSetCurrentPtr ) (CUcontext); +-extern CUresult ( *cuCtxDestroyPtr ) (CUcontext); +-extern CUresult ( *cuCtxCreatePtr ) (CUcontext *pctx, unsigned int flags, CUdevice dev); +-extern CUresult ( *cuCtxGetDevicePtr ) (CUdevice *); +-extern CUresult ( *cuDeviceGetPtr ) (CUdevice *, int); +-extern CUresult ( *cuDeviceGetCountPtr ) (int *); +-extern CUresult ( *cuDeviceGetNamePtr ) (char *, int, CUdevice); +-extern CUresult ( *cuDevicePrimaryCtxRetainPtr ) (CUcontext *pctx, CUdevice); +-extern CUresult ( *cuDevicePrimaryCtxReleasePtr ) (CUdevice); +-extern CUresult ( *cuInitPtr ) (unsigned int); +-extern CUresult ( *cuGetErrorStringPtr ) (CUresult error, const char** pStr); +-extern CUresult ( *cuCtxPopCurrentPtr ) (CUcontext * pctx); +-extern CUresult ( *cuCtxPushCurrentPtr ) (CUcontext pctx); +-extern CUresult ( *cuCtxSynchronizePtr ) (); +-extern CUresult ( *cuDeviceGetAttributePtr ) (int *, CUdevice_attribute, CUdevice); +- +-/* cuda runtime function pointers */ +-extern cudaError_t ( *cudaGetDeviceCountPtr ) (int *); +-extern cudaError_t ( *cudaGetDevicePtr ) (int *); +-extern cudaError_t ( *cudaSetDevicePtr ) (int); +-extern cudaError_t ( *cudaGetDevicePropertiesPtr ) (struct cudaDeviceProp* prop, int device); +-extern cudaError_t ( *cudaDeviceGetAttributePtr ) (int *value, enum cudaDeviceAttr attr, int device); +-extern cudaError_t ( *cudaFreePtr ) (void *); +-extern cudaError_t ( *cudaDriverGetVersionPtr ) (int *); +-extern cudaError_t ( *cudaRuntimeGetVersionPtr ) (int *); +- +-extern CUptiResult ( *cuptiGetVersionPtr ) (uint32_t* ); +- +-#define DLSYM_AND_CHECK( dllib, name ) dlsym( dllib, name ); \ +- if (dlerror() != NULL) { \ +- ERRDBG("A CUDA required function '%s' was not found in lib '%s'.\n", name, #dllib); \ +- return PAPI_EMISC; \ +- } +- +-#define CUDA_CALL( call, handleerror ) \ +- do { \ +- CUresult _status = (call); \ +- LOGCUDACALL("\t" #call "\n"); \ +- if (_status != CUDA_SUCCESS) { \ +- ERRDBG("CUDA Error %d: Error in call to " #call "\n", _status); \ +- EXIT_OR_NOT; \ +- handleerror; \ +- } \ +- } while (0); +-#define CUDART_CALL( call, handleerror ) \ +- do { \ +- cudaError_t _status = (call); \ +- LOGCUDACALL("\t" #call "\n"); \ +- if (_status != cudaSuccess) { \ +- ERRDBG("CUDART Error %d: Error in call to " #call "\n", _status); \ +- EXIT_OR_NOT; \ +- handleerror; \ +- } \ +- } while (0); +-#define CUPTI_CALL( call, handleerror ) \ +- do { \ +- CUptiResult _status = (call); \ +- LOGCUPTICALL("\t" #call "\n"); \ +- if (_status != CUPTI_SUCCESS) { \ +- ERRDBG("CUPTI Error %d: Error in call to " #call "\n", _status); \ +- EXIT_OR_NOT; \ +- handleerror; \ +- } \ +- } while (0); +- +-void cuptic_disabled_reason_set(const char *msg); +-void cuptic_disabled_reason_get(const char **pmsg); +- +-void *cuptic_load_dynamic_syms(const char *parent_path, const char *dlname, const char *search_subpaths[]); +-int cuptic_shutdown(void); +-int cuptic_device_get_count(int *num_gpus); +-int cuptic_init(void); +-int cuptic_is_runtime_perfworks_api(void); +-int cuptic_is_runtime_events_api(void); +- +-typedef struct cuptic_info *cuptic_info_t; +- +-int cuptic_ctxarr_create(cuptic_info_t *pinfo); +-int cuptic_ctxarr_update_current(cuptic_info_t info); +-int cuptic_ctxarr_get_ctx(cuptic_info_t info, int gpu_idx, CUcontext *ctx); +-int cuptic_ctxarr_destroy(cuptic_info_t *pinfo); +- +-/* Functions to track the occupancy of gpu counters in event sets */ +-int cuptic_device_acquire(cuptiu_event_table_t *evt_table); +-int cuptic_device_release(cuptiu_event_table_t *evt_table); +- +-#endif /* __CUPTI_COMMON_H__ */ +diff --git a/src/components/cuda/cupti_dispatch.c b/src/components/cuda/cupti_dispatch.c +index 1091278f0..b0584827f 100644 +--- a/src/components/cuda/cupti_dispatch.c ++++ b/src/components/cuda/cupti_dispatch.c +@@ -5,7 +5,7 @@ + */ + + #include "cupti_config.h" +-#include "cupti_common.h" ++#include "papi_cupti_common.h" + #include "cupti_dispatch.h" + #include "lcuda_debug.h" + +diff --git a/src/components/cuda/cupti_events.c b/src/components/cuda/cupti_events.c +index 3ca99fe98..30f90426c 100644 +--- a/src/components/cuda/cupti_events.c ++++ b/src/components/cuda/cupti_events.c +@@ -6,7 +6,7 @@ + + #include <papi.h> + #include "cupti_events.h" +-#include "cupti_common.h" ++#include "papi_cupti_common.h" + + #pragma GCC diagnostic ignored "-Wunused-parameter" + /* Functions needed by CUPTI Events API */ +diff --git a/src/components/cuda/cupti_profiler.c b/src/components/cuda/cupti_profiler.c +index 6229c10d3..b0e03e232 100644 +--- a/src/components/cuda/cupti_profiler.c ++++ b/src/components/cuda/cupti_profiler.c +@@ -14,7 +14,7 @@ + #include <nvperf_cuda_host.h> + #include <nvperf_target.h> + +-#include "cupti_common.h" ++#include "papi_cupti_common.h" + #include "cupti_profiler.h" + #include "lcuda_debug.h" + +-- +2.47.0 + |