Elem 是 complex 也就是複數
計算 vector<32> A matrix<32,32> B
calc AH dot B dot A
import numpy as np
np.set_printoptions(suppress=True)
import time
def time_me(fn):
def _wrapper(*args, **kwargs):
start = time.clock()
fn(*args, **kwargs)
#print "%s cost {:.4f} second".format(fn.__name__, time.clock() - start)
print (" {} cost {:.9f} s".format(fn.__name__,time.clock() - start))
return _wrapper
m1 = np.array(0.7071+0.7072j)
m1 = np.resize(m1,(1,32))
m2 = np.array(0.7072+0.7073j)
m2 = np.resize(m2,(32,32))
m3 = np.array(0.7073+0.7074j)
m3 = np.resize(m2,(32,1))
@time_me
def test():
np.dot(np.dot(m1,m2),m3)
test()
matlab導出的代碼
//
// File: matrixdot.cpp
//
// MATLAB Coder version : 3.4
// C/C++ source code generated on : 28-Jan-2021 22:17:34
//
// Include Files
#include "rt_nonfinite.h"
#include "matrixdot.h"
#include <stdio.h>
// Function Definitions
//
// Arguments : const creal_T m1_data[]
// const int m1_size[2]
// const creal_T m2_data[]
// const int m2_size[2]
// const creal_T m3_data[]
// const int m3_size[2]
// creal_T res_data[]
// int res_size[2]
// Return Type : void
//
void matrixdot(const creal_T m1_data[], const int m1_size[2], const creal_T
m2_data[], const int m2_size[2], const creal_T m3_data[], const
int m3_size[2], creal_T res_data[], int res_size[2])
{
int k;
int y_size_idx_1;
int ar;
int n;
int i0;
creal_T y_data[32];
int cr;
int br;
int ic;
int ib;
double temp_re;
double temp_im;
int ia;
if ((m1_size[1] == 1) || (m2_size[0] == 1)) {
y_size_idx_1 = m2_size[1];
ar = m2_size[1];
for (i0 = 0; i0 < ar; i0++) {
y_data[i0].re = 0.0;
y_data[i0].im = 0.0;
cr = m1_size[1];
for (br = 0; br < cr; br++) {
y_data[i0].re += m1_data[m1_size[0] * br].re * m2_data[br + m2_size[0] *
i0].re - m1_data[m1_size[0] * br].im * m2_data[br + m2_size[0] * i0].
im;
y_data[i0].im += m1_data[m1_size[0] * br].re * m2_data[br + m2_size[0] *
i0].im + m1_data[m1_size[0] * br].im * m2_data[br + m2_size[0] * i0].
re;
}
}
} else {
k = m1_size[1];
y_size_idx_1 = (signed char)m2_size[1];
n = m2_size[1] - 1;
ar = (signed char)m2_size[1];
for (i0 = 0; i0 < ar; i0++) {
y_data[i0].re = 0.0;
y_data[i0].im = 0.0;
}
if (m2_size[1] != 0) {
for (cr = 1; cr - 1 <= n; cr++) {
for (ic = cr; ic <= cr; ic++) {
y_data[ic - 1].re = 0.0;
y_data[ic - 1].im = 0.0;
}
}
br = 0;
for (cr = 0; cr <= n; cr++) {
ar = -1;
i0 = br + k;
for (ib = br; ib + 1 <= i0; ib++) {
if ((m2_data[ib].re != 0.0) || (m2_data[ib].im != 0.0)) {
temp_re = m2_data[ib].re - 0.0 * m2_data[ib].im;
temp_im = m2_data[ib].im + 0.0 * m2_data[ib].re;
ia = ar;
for (ic = cr; ic + 1 <= cr + 1; ic++) {
ia++;
y_data[ic].re += temp_re * m1_data[ia].re - temp_im * m1_data[ia].
im;
y_data[ic].im += temp_re * m1_data[ia].im + temp_im * m1_data[ia].
re;
}
}
ar++;
}
br += k;
}
}
}
if (y_size_idx_1 == 1) {
res_size[0] = 1;
res_size[1] = m3_size[1];
ar = m3_size[1];
for (i0 = 0; i0 < ar; i0++) {
res_data[i0].re = 0.0;
res_data[i0].im = 0.0;
for (br = 0; br < 32; br++) {
res_data[i0].re += y_data[br].re * m3_data[br + m3_size[0] * i0].re -
y_data[br].im * m3_data[br + m3_size[0] * i0].im;
res_data[i0].im += y_data[br].re * m3_data[br + m3_size[0] * i0].im +
y_data[br].im * m3_data[br + m3_size[0] * i0].re;
}
}
} else {
res_size[1] = (signed char)m3_size[1];
res_size[0] = 1;
ar = (signed char)m3_size[1];
for (i0 = 0; i0 < ar; i0++) {
res_data[i0].re = 0.0;
res_data[i0].im = 0.0;
}
if (m3_size[1] != 0) {
res_data[0].re = 0.0;
res_data[0].im = 0.0;
ar = -1;
for (ib = 0; ib + 1 <= y_size_idx_1; ib++) {
if ((m3_data[ib].re != 0.0) || (m3_data[ib].im != 0.0)) {
temp_re = m3_data[ib].re - 0.0 * m3_data[ib].im;
temp_im = m3_data[ib].im + 0.0 * m3_data[ib].re;
res_data[0].re += temp_re * y_data[ar + 1].re - temp_im * y_data[ar +
1].im;
res_data[0].im += temp_re * y_data[ar + 1].im + temp_im * y_data[ar +
1].re;
}
ar++;
}
}
}
}
//
// Arguments : void
// Return Type : creal_T
//
static creal_T argInit_creal_T(double re,double im)
{
creal_T result;
result.re = re;
result.im = im;
return result;
}
#include <ctime>
static inline int64_t getTimeNs()
{
struct timespec ts;
clock_gettime(CLOCK_REALTIME, &ts);
return ts.tv_sec*1000000000 + ts.tv_nsec;
}
int main()
{
const int m1_size[2] = {1,32};
const int m2_size[2] = {32,32};
const int m3_size[2] = {32,1};
int res_size[2] = {1,1};
creal_T m1_data[32];
creal_T m2_data[1024];
creal_T m3_data[32];
creal_T res_data[1];
for (int idx1 = 0; idx1 < 32; idx1++) {
m1_data[idx1] = argInit_creal_T(0.7071,0.7072);
}
for (int idx1 = 0; idx1 < 1024; idx1++) {
m2_data[idx1] = argInit_creal_T(0.7072,0.7073);
}
for (int idx1 = 0; idx1 < 32; idx1++) {
m3_data[idx1] = argInit_creal_T(0.7073,0.7074);
}
auto a = getTimeNs();
matrixdot(m1_data, m1_size, m2_data , m2_size, m3_data, m3_size, res_data, res_size);
auto b = getTimeNs();
//cout << "r: " << res_data[0].re << ",i:" << res_data[0].im << endl;
printf("R: %f I: %f cost: %ld ns",res_data[0].re,res_data[0].im,b-a);
return 0;
}
//
// File trailer for matrixdot.cpp
//
// [EOF]
//
cpu支持avx256
R: -724.671038 I: 724.363714 cost: 3261 ns
自己寫使用stl的complex
#include <chrono>
using namespace chrono;
class Tm{
public:
Tm(){
start = chrono::system_clock::now();
}
virtual ~Tm()
{
auto end = system_clock::now();
auto duration = duration_cast<microseconds>(end - start);
cout << "cost "
//<< double(duration.count()) * microseconds::period::num / microseconds::period::den
<< double(duration.count())
<< " xx " << endl;
}
private:
std::chrono::system_clock::time_point start;
};
template <typename T>
using is_arith = typename std::enable_if<is_arithmetic<T>::value>::type;
template <typename T = float, typename = is_arith<T>>
using Elem = complex<T>;
template <size_t M = 0 /*row*/, size_t N = 0 /*col*/>
using Matrix = array<array<Elem<>, N>, M>;
template<typename T>
void isRightValue(const T& v)
{
cout << "R" << endl;
}
template<typename T>
void isRightValue(T& v)
{
cout << "L" << endl;
}
template <size_t M, size_t N, size_t L> inline
Matrix<M, L> dot(const Matrix<M, N>& m1,const Matrix<N, L>& m2)
{
//cout << typeid(m1).name() << endl;
Matrix<M, L> m;
for (int i = 0; i < M; i++)
{
for (int j = 0; j < L; j++)
{
for (int k = 0; k < N; k++)
{
m[i][j] += m1[i][k] * m2[k][j];
}
}
}
return m;
}
template <size_t M, size_t N, size_t M1, size_t N1> inline
Matrix<M * M1, N * N1>&& kron(const Matrix<M, N>& m1,const Matrix<M1, N1>& m2)
{
Matrix<M * M1, N * N1> m3;
for (int i = 0; i < M; i++)
{
for (int j = 0; j < N; j++)
{
for (int k = 0; k < M1; k++)
{
for (int l = 0; l < N1; l++)
{
m3[i * M1 + k][j * N1 + l] = m1[i][j] * m2[k][l];
}
}
}
}
return std::move(m3);
}
template <typename T>
void MatrixDump(const T& m)
{
cout << "M:" << endl;
for (auto row : m)
{
for (auto elem : row)
{
cout << " " << elem;
}
cout << endl;
}
}
Matrix<1, 32> m1 = {{
{{{0.7071, 0.7072}, {0.7071, 0.7072},{0.7071, 0.7072},{0.7071, 0.7072},{0.7071, 0.7072},{0.7071, 0.7072},{0.7071, 0.7072},{0.7071, 0.7072},
{0.7071, 0.7072},{0.7071, 0.7072},{0.7071, 0.7072},{0.7071, 0.7072},{0.7071, 0.7072},{0.7071, 0.7072},{0.7071, 0.7072},{0.7071, 0.7072},
{0.7071, 0.7072},{0.7071, 0.7072},{0.7071, 0.7072},{0.7071, 0.7072},{0.7071, 0.7072},{0.7071, 0.7072},{0.7071, 0.7072},{0.7071, 0.7072},
{0.7071, 0.7072},{0.7071, 0.7072},{0.7071, 0.7072},{0.7071, 0.7072},{0.7071, 0.7072},{0.7071, 0.7072},{0.7071, 0.7072},{0.7071, 0.7072}}}
}};
Matrix<32, 32> m2;
Matrix<32,1> m3 = {{
{{{0.7073, 0.7074}}},{{{0.7073, 0.7074}}},{{{0.7073, 0.7074}}},{{{0.7073, 0.7074}}},{{{0.7073, 0.7074}}},{{{0.7073, 0.7074}}},{{{0.7073, 0.7074}}},{{{0.7073, 0.7074}}},
{{{0.7073, 0.7074}}},{{{0.7073, 0.7074}}},{{{0.7073, 0.7074}}},{{{0.7073, 0.7074}}},{{{0.7073, 0.7074}}},{{{0.7073, 0.7074}}},{{{0.7073, 0.7074}}},{{{0.7073, 0.7074}}},
{{{0.7073, 0.7074}}},{{{0.7073, 0.7074}}},{{{0.7073, 0.7074}}},{{{0.7073, 0.7074}}},{{{0.7073, 0.7074}}},{{{0.7073, 0.7074}}},{{{0.7073, 0.7074}}},{{{0.7073, 0.7074}}},
{{{0.7073, 0.7074}}},{{{0.7073, 0.7074}}},{{{0.7073, 0.7074}}},{{{0.7073, 0.7074}}},{{{0.7073, 0.7074}}},{{{0.7073, 0.7074}}},{{{0.7073, 0.7074}}},{{{0.7073, 0.7074}}}
}};
template <typename Args>
constexpr void UNUSED(const Args& param)
{
}
double foo()
{
for(int i=0;i<32;i++)
{
for(int j=0;j<32;j++)
{
m2[i][j] = {0.7072,0.7073};
}
}
Matrix<1,1> m23;
//Matrix<1,1> m234;
{
auto before1 = getTimeNs() ;
// Tm tm;
m23 = dot<1,32,1>(dot<1, 32, 32>(m1, m2),m3);
auto after1 = getTimeNs();
cout << after1 -before1 << "ns:" << m23[0][0] << endl;
}
{
auto be2 = getTimeNs();
for(int i=0;i<256;i++)
m23 = dot<1,32,1>(dot<1, 32, 32>(m1, m2),m3);
auto af2 = getTimeNs();
cout << af2 - be2 << "ns:" << m23[0][0] << endl;
}
// return m23[0][0].real();
printf("%fn",m23[0][0].real());
// return m23[0][0].real;
// cout << time(0) << endl;
#if 0
// MatrixDump(m1);
// MatrixDump(m2);
// MatrixDump(m3);
// MatrixDump(m12);
//MatrixDump(m23);
// MatrixDump(m234);
#endif
return 0;
}
double testreal = 0;
int main()
{
foo();
return 0;
}
耗時:
1783ns:(-724.671,724.363)
384842ns:(-724.671,724.363)
-724.670593
最后
以上就是体贴发带最近收集整理的关于c\c++ matrix profile的全部内容,更多相关c\c++内容请搜索靠谱客的其他文章。
发表评论 取消回复