DROO demo_on_off.py

109 阅读 0 评论 72 点赞

我是靠谱客的博主包容人生，这篇文章主要介绍DROO demo_on_off.py，现在分享给大家，希望可以做个参考。

完整代码在文章作者的仓库：https://github.com/revenol/DROO。为方便自己看代码所以放上来，看完就删。
这个是论文《Deep Reinforcement Learning for Online Ofﬂoading in Wireless Powered Mobile-Edge Computing Networks》的tf1.x版本代码。

复制代码

#
#################################################################
#
Deep Reinforcement Learning for Online Ofﬂoading in Wireless Powered Mobile-Edge Computing Networks
#
#
This file contains a demo evaluating the performance of DROO by randomly turning on/off some WDs. It loads the training samples from ./data/data_#.mat, where # denotes the number of active WDs in the MEC network. Note that, the maximum computation rate need be recomputed by solving (P2) once a WD is turned off/on.
#
#
References:
#
[1] 1. Liang Huang, Suzhi Bi, and Ying-jun Angela Zhang, “Deep Reinforcement Learning for Online Ofﬂoading in Wireless Powered Mobile-Edge Computing Networks”, submitted to IEEE Journal on Selected Areas in Communications.
#
# version 1.0 -- April 2019. Written by Liang Huang (lianghuang AT zjut.edu.cn)
#
#################################################################
import scipy.io as sio
# import scipy.io for .mat file I/
import numpy as np
# import numpy
from memory import MemoryDNN
from optimization import bisection
from main import plot_rate, save_to_txt
import time
def WD_off(channel, N_active, N):
# turn off one WD
if N_active > 5: # current we support half of WDs are off
N_active = N_active - 1
# set the (N-active-1)th channel to close to 0
# since all channels in each time frame are randomly generated, we turn of the WD with greatest index
channel[:,N_active] = channel[:, N_active] / 1000000 # a programming trick,such that we can recover its channel gain once the WD is turned on again.
print("
The %dth WD is turned on."%(N_active +1))
# update the expected maximum computation rate
rate = sio.loadmat('./data/data_%d' %N_active)['output_obj']
return channel, rate, N_active
def WD_on(channel, N_active, N):
# turn on one WD
if N_active < N:
N_active = N_active + 1
# recover (N_active-1)th channel 
channel[:,N_active-1] = channel[:, N_active-1] * 1000000
print("
The %dth WD is turned on."%(N_active))
# update the expected maximum computation
rate
rate = sio.loadmat('./data/data_%d' %N_active)['output_obj']
return channel, rate, N_active
if __name__ == "__main__":
'''
This demo evaluate DROO for MEC networks where WDs can be occasionally turned off/on. After DROO converges, we randomly turn off on one WD at each time frame 6,000, 6,500, 7,000, and 7,500, and then turn them on at time frames 8,000, 8,500, and 9,000. At time frame 9,500 , we randomly turn off two WDs, resulting an MEC network with 8 acitve WDs.
'''
N = 10
# number of users
N_active = N
# number of effective users
N_off = 0
# number of off-users
n = 10000
# number of time frames, <= 10,000
K = N
# initialize K = N
decoder_mode = 'OP'
# the quantization mode could be 'OP' (Order-preserving) or 'KNN'
Memory = 1024
# capacity of memory structure
Delta = 32
# Update interval for adaptive K
print('#user = %d, #channel=%d, K=%d, decoder = %s, Memory = %d, Delta = %d'%(N,n,K,decoder_mode, Memory, Delta))
# Load data
channel = sio.loadmat('./data/data_%d' %N)['input_h']
rate = sio.loadmat('./data/data_%d' %N)['output_obj']
# increase h to close to 1 for better training; it is a trick widely adopted in deep learning
channel = channel * 1000000
channel_bak = channel.copy()
# generate the train and test data sample index
# data are splitted as 80:20
# training data are randomly sampled with duplication if n > total data size
split_idx = int(.8* len(channel))
num_test = min(len(channel) - split_idx, n - int(.8* n)) # training data size
mem = MemoryDNN(net = [N, 120, 80, N],
learning_rate = 0.01,
training_interval=10,
batch_size=128,
memory_size=Memory
)
start_time=time.time()
rate_his = []
rate_his_ratio = []
mode_his = []
k_idx_his = []
K_his = []
h = channel[0,:]
for i in range(n):
# for dynamic number of WDs
if i ==0.6*n:
print("At time frame %d:"%(i))
channel, rate, N_active = WD_off(channel, N_active, N)
if i ==0.65*n:
print("At time frame %d:"%(i))
channel, rate, N_active = WD_off(channel, N_active, N)
if i ==0.7*n:
print("At time frame %d:"%(i))
channel, rate, N_active = WD_off(channel, N_active, N)
if i ==0.75*n:
print("At time frame %d:"%(i))
channel, rate, N_active = WD_off(channel, N_active, N)
if i ==0.8*n:
print("At time frame %d:"%(i))
channel, rate, N_active = WD_on(channel, N_active, N)
if i ==0.85*n:
print("At time frame %d:"%(i))
channel, rate, N_active = WD_on(channel, N_active, N)
if i ==0.9*n:
print("At time frame %d:"%(i))
channel, rate, N_active = WD_on(channel, N_active, N)
channel, rate, N_active = WD_on(channel, N_active, N)
if i == 0.95*n:
print("At time frame %d:"%(i))
channel, rate, N_active = WD_off(channel, N_active, N)
channel, rate, N_active = WD_off(channel, N_active, N)
if i % (n//10) == 0:
print("%0.1f"%(i/n))
if i> 0 and i % Delta == 0:
# index counts from 0
if Delta > 1:
max_k = max(k_idx_his[-Delta:-1]) +1;
else:
max_k = k_idx_his[-1] +1;
K = min(max_k +1, N)
i_idx = i
h = channel[i_idx,:]
# the action selection must be either 'OP' or 'KNN'
m_list = mem.decode(h, K, decoder_mode)
r_list = []
for m in m_list:
# only acitve users are used to compute the rate
r_list.append(bisection(h[0:N_active]/1000000, m[0:N_active])[0])
# memorize the largest reward
rate_his.append(np.max(r_list))
rate_his_ratio.append(rate_his[-1] / rate[i_idx][0])
# record the index of largest reward
k_idx_his.append(np.argmax(r_list))
# record K in case of adaptive K
K_his.append(K)
# save the mode with largest reward
mode_his.append(m_list[np.argmax(r_list)])
#
if i <0.6*n:
# encode the mode with largest reward
mem.encode(h, m_list[np.argmax(r_list)])
total_time=time.time()-start_time
mem.plot_cost()
plot_rate(rate_his_ratio)
print("Averaged normalized computation rate:", sum(rate_his_ratio[-num_test: -1])/num_test)
print('Total time consumed:%s'%total_time)
print('Average time per channel:%s'%(total_time/n))
# save data into txt
save_to_txt(k_idx_his, "k_idx_his.txt")
save_to_txt(K_his, "K_his.txt")
save_to_txt(mem.cost_his, "cost_his.txt")
save_to_txt(rate_his_ratio, "rate_his_ratio.txt")
save_to_txt(mode_his, "mode_his.txt")

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
#
#################################################################
#
Deep Reinforcement Learning for Online Ofﬂoading in Wireless Powered Mobile-Edge Computing Networks
#
#
This file contains a demo evaluating the performance of DROO by randomly turning on/off some WDs. It loads the training samples from ./data/data_#.mat, where # denotes the number of active WDs in the MEC network. Note that, the maximum computation rate need be recomputed by solving (P2) once a WD is turned off/on.
#
#
References:
#
[1] 1. Liang Huang, Suzhi Bi, and Ying-jun Angela Zhang, “Deep Reinforcement Learning for Online Ofﬂoading in Wireless Powered Mobile-Edge Computing Networks”, submitted to IEEE Journal on Selected Areas in Communications.
#
# version 1.0 -- April 2019. Written by Liang Huang (lianghuang AT zjut.edu.cn)
#
#################################################################
import scipy.io as sio
# import scipy.io for .mat file I/
import numpy as np
# import numpy
from memory import MemoryDNN
from optimization import bisection
from main import plot_rate, save_to_txt
import time
def WD_off(channel, N_active, N):
# turn off one WD
if N_active > 5: # current we support half of WDs are off
N_active = N_active - 1
# set the (N-active-1)th channel to close to 0
# since all channels in each time frame are randomly generated, we turn of the WD with greatest index
channel[:,N_active] = channel[:, N_active] / 1000000 # a programming trick,such that we can recover its channel gain once the WD is turned on again.
print("
The %dth WD is turned on."%(N_active +1))
# update the expected maximum computation rate
rate = sio.loadmat('./data/data_%d' %N_active)['output_obj']
return channel, rate, N_active
def WD_on(channel, N_active, N):
# turn on one WD
if N_active < N:
N_active = N_active + 1
# recover (N_active-1)th channel 
channel[:,N_active-1] = channel[:, N_active-1] * 1000000
print("
The %dth WD is turned on."%(N_active))
# update the expected maximum computation
rate
rate = sio.loadmat('./data/data_%d' %N_active)['output_obj']
return channel, rate, N_active
if __name__ == "__main__":
'''
This demo evaluate DROO for MEC networks where WDs can be occasionally turned off/on. After DROO converges, we randomly turn off on one WD at each time frame 6,000, 6,500, 7,000, and 7,500, and then turn them on at time frames 8,000, 8,500, and 9,000. At time frame 9,500 , we randomly turn off two WDs, resulting an MEC network with 8 acitve WDs.
'''
N = 10
# number of users
N_active = N
# number of effective users
N_off = 0
# number of off-users
n = 10000
# number of time frames, <= 10,000
K = N
# initialize K = N
decoder_mode = 'OP'
# the quantization mode could be 'OP' (Order-preserving) or 'KNN'
Memory = 1024
# capacity of memory structure
Delta = 32
# Update interval for adaptive K
print('#user = %d, #channel=%d, K=%d, decoder = %s, Memory = %d, Delta = %d'%(N,n,K,decoder_mode, Memory, Delta))
# Load data
channel = sio.loadmat('./data/data_%d' %N)['input_h']
rate = sio.loadmat('./data/data_%d' %N)['output_obj']
# increase h to close to 1 for better training; it is a trick widely adopted in deep learning
channel = channel * 1000000
channel_bak = channel.copy()
# generate the train and test data sample index
# data are splitted as 80:20
# training data are randomly sampled with duplication if n > total data size
split_idx = int(.8* len(channel))
num_test = min(len(channel) - split_idx, n - int(.8* n)) # training data size
mem = MemoryDNN(net = [N, 120, 80, N],
learning_rate = 0.01,
training_interval=10,
batch_size=128,
memory_size=Memory
)
start_time=time.time()
rate_his = []
rate_his_ratio = []
mode_his = []
k_idx_his = []
K_his = []
h = channel[0,:]
for i in range(n):
# for dynamic number of WDs
if i ==0.6*n:
print("At time frame %d:"%(i))
channel, rate, N_active = WD_off(channel, N_active, N)
if i ==0.65*n:
print("At time frame %d:"%(i))
channel, rate, N_active = WD_off(channel, N_active, N)
if i ==0.7*n:
print("At time frame %d:"%(i))
channel, rate, N_active = WD_off(channel, N_active, N)
if i ==0.75*n:
print("At time frame %d:"%(i))
channel, rate, N_active = WD_off(channel, N_active, N)
if i ==0.8*n:
print("At time frame %d:"%(i))
channel, rate, N_active = WD_on(channel, N_active, N)
if i ==0.85*n:
print("At time frame %d:"%(i))
channel, rate, N_active = WD_on(channel, N_active, N)
if i ==0.9*n:
print("At time frame %d:"%(i))
channel, rate, N_active = WD_on(channel, N_active, N)
channel, rate, N_active = WD_on(channel, N_active, N)
if i == 0.95*n:
print("At time frame %d:"%(i))
channel, rate, N_active = WD_off(channel, N_active, N)
channel, rate, N_active = WD_off(channel, N_active, N)
if i % (n//10) == 0:
print("%0.1f"%(i/n))
if i> 0 and i % Delta == 0:
# index counts from 0
if Delta > 1:
max_k = max(k_idx_his[-Delta:-1]) +1;
else:
max_k = k_idx_his[-1] +1;
K = min(max_k +1, N)
i_idx = i
h = channel[i_idx,:]
# the action selection must be either 'OP' or 'KNN'
m_list = mem.decode(h, K, decoder_mode)
r_list = []
for m in m_list:
# only acitve users are used to compute the rate
r_list.append(bisection(h[0:N_active]/1000000, m[0:N_active])[0])
# memorize the largest reward
rate_his.append(np.max(r_list))
rate_his_ratio.append(rate_his[-1] / rate[i_idx][0])
# record the index of largest reward
k_idx_his.append(np.argmax(r_list))
# record K in case of adaptive K
K_his.append(K)
# save the mode with largest reward
mode_his.append(m_list[np.argmax(r_list)])
#
if i <0.6*n:
# encode the mode with largest reward
mem.encode(h, m_list[np.argmax(r_list)])
total_time=time.time()-start_time
mem.plot_cost()
plot_rate(rate_his_ratio)
print("Averaged normalized computation rate:", sum(rate_his_ratio[-num_test: -1])/num_test)
print('Total time consumed:%s'%total_time)
print('Average time per channel:%s'%(total_time/n))
# save data into txt
save_to_txt(k_idx_his, "k_idx_his.txt")
save_to_txt(K_his, "K_his.txt")
save_to_txt(mem.cost_his, "cost_his.txt")
save_to_txt(rate_his_ratio, "rate_his_ratio.txt")
save_to_txt(mode_his, "mode_his.txt")