#!/usr/bin/env python3 # -*- coding: utf-8 -*- import ctypes import time import sys import os # ---------- Color definitions ---------- class Colors: HEADER = '\033[95m' BLUE = '\033[94m' CYAN = '\033[96m' GREEN = '\033[92m' YELLOW = '\033[93m' RED = '\033[91m' ENDC = '\033[0m' BOLD = '\033[1m' UNDERLINE = '\033[4m' def print_info(msg): print(f"{Colors.CYAN}{msg}{Colors.ENDC}") def print_success(msg): print(f"{Colors.GREEN}{msg}{Colors.ENDC}") def print_warning(msg): print(f"{Colors.YELLOW}{msg}{Colors.ENDC}") def print_error(msg): print(f"{Colors.RED}{msg}{Colors.ENDC}") def print_header(msg): print(f"{Colors.BOLD}{Colors.BLUE}{msg}{Colors.ENDC}") # ---------- Configuration ---------- TARGET_DEVICES = list(range(8)) # Occupy devices 0-7 GB = 1024 ** 3 TOTAL_PER_CARD = 28 * GB # 28GB per card CHUNK_SIZE = 512 * 1024 * 1024 # 512MB chunk allocation # ------------------------------------ def load_dependencies(): """Preload driver libraries that libascendcl.so depends on""" driver_paths = [ "/usr/local/Ascend/driver/lib64/driver", "/usr/local/Ascend/driver/lib64", "/usr/local/Ascend/ascend-toolkit/latest/lib64", ] libs_to_load = ["libmsprofiler.so", "libc_sec.so", "libdrvproxy.so"] for path in driver_paths: for lib in libs_to_load: full_path = os.path.join(path, lib) if os.path.isfile(full_path): try: ctypes.CDLL(full_path) print_info(f"Preloaded dependency: {full_path}") except OSError: pass def find_cann_lib(): """Automatically find libascendcl.so based on environment variables or default paths""" toolkit_path = os.environ.get("ASCEND_TOOLKIT_PATH") if toolkit_path: candidates = [ f"{toolkit_path}/lib64/libascendcl.so", f"{toolkit_path}/aarch64-linux/lib64/libascendcl.so" ] for cand in candidates: if os.path.isfile(cand): return cand home = os.environ.get("ASCEND_HOME") if home: candidates = [ f"{home}/ascend-toolkit/latest/lib64/libascendcl.so", f"{home}/ascend-toolkit/latest/aarch64-linux/lib64/libascendcl.so" ] for cand in candidates: if os.path.isfile(cand): return cand fallbacks = [ "/usr/local/Ascend/ascend-toolkit/latest/lib64/libascendcl.so", "/usr/local/Ascend/ascend-toolkit/latest/aarch64-linux/lib64/libascendcl.so", "/usr/local/Ascend/cann-9.0.0/lib64/libascendcl.so", ] for path in fallbacks: if os.path.isfile(path): return path raise FileNotFoundError("Cannot find libascendcl.so. Please check CANN installation or set ASCEND_TOOLKIT_PATH environment variable") def load_library(): load_dependencies() lib_path = find_cann_lib() print_info(f"Using CANN library: {lib_path}") try: lib = ctypes.CDLL(lib_path) except OSError as e: print_error(f"Failed to load library {lib_path}: {e}") print_warning("Hint: Please run 'source /usr/local/Ascend/ascend-toolkit/set_env.sh' and export LD_LIBRARY_PATH") sys.exit(1) lib.aclInit.argtypes = [ctypes.c_void_p] lib.aclInit.restype = ctypes.c_int lib.aclrtSetDevice.argtypes = [ctypes.c_int] lib.aclrtSetDevice.restype = ctypes.c_int lib.aclrtMalloc.argtypes = [ctypes.POINTER(ctypes.c_void_p), ctypes.c_size_t, ctypes.c_int] lib.aclrtMalloc.restype = ctypes.c_int lib.aclrtFree.argtypes = [ctypes.c_void_p] lib.aclrtFree.restype = ctypes.c_int lib.aclrtResetDevice.argtypes = [ctypes.c_int] lib.aclrtResetDevice.restype = ctypes.c_int lib.aclFinalize.argtypes = [] lib.aclFinalize.restype = ctypes.c_int return lib def occupy_device(lib, device_id, target_bytes, chunk_size): ret = lib.aclrtSetDevice(device_id) if ret != 0: print_warning(f"[WARN] Device {device_id} setup failed (error code {ret}), skipping") return [] allocated = [] total = 0 while total < target_bytes: ptr = ctypes.c_void_p() ret = lib.aclrtMalloc(ctypes.byref(ptr), chunk_size, 0) if ret != 0: print_warning(f"[WARN] Device {device_id} failed to allocate {chunk_size//GB}GB (error code {ret}), allocated {total//GB}GB") break allocated.append(ptr) total += chunk_size if total > 0: print_success(f"[OK] Device {device_id} occupied {total//GB} GB") else: print_error(f"[FAIL] Device {device_id} failed to allocate any memory") return allocated def main(): print_header("=" * 60) print_header("NPU Memory Occupation Tool") print_header("=" * 60) print_info("Initializing ACL ...") lib = load_library() ret = lib.aclInit(None) if ret != 0: print_error(f"[FAIL] aclInit failed, error code {ret}") sys.exit(1) device_pointers = {} try: for dev_id in TARGET_DEVICES: print_info(f"Processing device {dev_id} ...") ptrs = occupy_device(lib, dev_id, TOTAL_PER_CARD, CHUNK_SIZE) if ptrs: device_pointers[dev_id] = ptrs if not device_pointers: print_error("[FAIL] No devices successfully occupied, exiting") lib.aclFinalize() sys.exit(1) print_success("All target devices occupied successfully!") print_info("Press Ctrl+C to release all memory and exit") while True: time.sleep(3600) except KeyboardInterrupt: print_warning("User terminated, releasing resources...") finally: for dev_id, ptrs in device_pointers.items(): lib.aclrtSetDevice(dev_id) for ptr in ptrs: lib.aclrtFree(ptr) print_success(f"[OK] Device {dev_id} memory released") for dev_id in device_pointers.keys(): lib.aclrtResetDevice(dev_id) lib.aclFinalize() print_success("Cleanup complete, NPU fully released") if __name__ == "__main__": main()