pytorch使用记录（六）明确使用哪块GPU

81 阅读 0 评论 54 点赞

我是靠谱客的博主独特心锁，这篇文章主要介绍pytorch使用记录（六）明确使用哪块GPU，现在分享给大家，希望可以做个参考。

可以用在多GPU的服务器上，明确使用的是哪块GPU，可以将信息打印出来，返回的是device的列表。

具体用到的pytorch函数是torch.cuda.device_count()，返回可得到的GPU数量。

接下来是具体函数代码，分为两部分，一个主函数get_proper_device明确使用GPU还是CPU，另外一个函数get_proper_cuda_device是明确具体使用哪块GPU。

复制代码

# 获取 GPU信息
def get_proper_cuda_device(device, verbose=True):
    if not isinstance(device, list):
        device = [device]
    count = torch.cuda.device_count()
    if verbose:
        print("[Builder]: Found {} gpu".format(count)) 
    for i in range(len(device)):
        d = device[i]
        d_id = None
        if isinstance(d, str):
            #  正则表达式，查看是否存在“cuda:0”这种形式。
            if re.search("cuda:[d]+", d):
                d_id = int(d[5:])
        elif isinstance(d, int):
            d_id = d
        if d_id is None:
            raise ValueError("[Builder]: Wrong cuda id {}".format(d))
        if d_id < 0 or d_id >= count:
            if verbose:
                print("[Builder]: {} is not found, ignore.".format(d))
            device[i] = None
        else:
            device[i] = d_id
    device = [d for d in device if d is not None]  # 返回可用的GPU，去掉None元素。
    return device

# 确定使用GPU还是CPU
def get_proper_device(devices, verbose=True):
    devices = copy.copy(devices)  # 浅拷贝，避免改变传入参数的值
    if not isinstance(devices, list):
        devices = [devices]
    use_cpu = any([d.find("cpu")>=0 for d in devices])
    use_gpu = any([(d.find("cuda")>=0 or isinstance(d, int)) for d in devices])
    assert not (use_cpu and use_gpu), "{} contains cpu and cuda device.".format(devices)
    if use_gpu:
        devices = get_proper_cuda_device(devices, verbose)
        if len(devices) == 0:
            if verbose:
                print("[Builder]: Failed to find any valid gpu in {}, use `cpu`.".format(origin))
            devices = ["cpu"]
    return devices

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# 获取 GPU信息
def get_proper_cuda_device(device, verbose=True):
    if not isinstance(device, list):
        device = [device]
    count = torch.cuda.device_count()
    if verbose:
        print("[Builder]: Found {} gpu".format(count)) 
    for i in range(len(device)):
        d = device[i]
        d_id = None
        if isinstance(d, str):
            #  正则表达式，查看是否存在“cuda:0”这种形式。
            if re.search("cuda:[d]+", d):
                d_id = int(d[5:])
        elif isinstance(d, int):
            d_id = d
        if d_id is None:
            raise ValueError("[Builder]: Wrong cuda id {}".format(d))
        if d_id < 0 or d_id >= count:
            if verbose:
                print("[Builder]: {} is not found, ignore.".format(d))
            device[i] = None
        else:
            device[i] = d_id
    device = [d for d in device if d is not None]  # 返回可用的GPU，去掉None元素。
    return device

# 确定使用GPU还是CPU
def get_proper_device(devices, verbose=True):
    devices = copy.copy(devices)  # 浅拷贝，避免改变传入参数的值
    if not isinstance(devices, list):
        devices = [devices]
    use_cpu = any([d.find("cpu")>=0 for d in devices])
    use_gpu = any([(d.find("cuda")>=0 or isinstance(d, int)) for d in devices])
    assert not (use_cpu and use_gpu), "{} contains cpu and cuda device.".format(devices)
    if use_gpu:
        devices = get_proper_cuda_device(devices, verbose)
        if len(devices) == 0:
            if verbose:
                print("[Builder]: Failed to find any valid gpu in {}, use `cpu`.".format(origin))
            devices = ["cpu"]
    return devices