zoukankan      html  css  js  c++  java
  • Learn ZYNQ (7)

    矩阵相乘的例子

    参考博客:http://blog.csdn.net/kkk584520/article/details/18812321

    MatrixMultiply.c

        typedef int data_type;
        #define N 5
    
        void MatrixMultiply(data_type AA[N*N],data_type bb[N],data_type cc[N])
        {
            int i,j;
            for(i = 0;i<N;i++)
            {
                data_type sum = 0;
                for(j = 0;j<N;j++)
                {
                    sum += AA[i*N+j]*bb[j];
                }
                cc[i] = sum;
            }
        }

    修改后:

    #include <ap_cint.h>
    typedef uint15 data_type;
        #define N 5
    
        void MatrixMultiply(data_type AA[N*N],data_type bb[N],data_type cc[N])
        {
            int i,j;
            MatrixMultiply_label2:for(i = 0;i<N;i++)
            {
                data_type sum = 0;
                MatrixMultiply_label1:for(j = 0;j<N;j++)
                {
                    sum += AA[i*N+j]*bb[j];
                }
                cc[i] = sum;
            }
        }

    测试文件:TestMatrixMultiply.c:

    #include <stdio.h>
    #include <ap_cint.h>
    typedef uint15 data_type;
    #define N 5
    const data_type MatrixA[] = {
        #include "a.h"
    };
    const data_type Vector_b[] = {
        #include "b.h"
    };
    const data_type MatlabResult_c[] = {
        #include "c.h"
    };
    data_type HLS_Result_c[N] = {0};
    void CheckResult(data_type * matlab_result,data_type * your_result);
    
    int main(void)
    {
    	int i;
         printf("Checking Results:
    ");
         MatrixMultiply(MatrixA,Vector_b,HLS_Result_c);
         CheckResult(MatlabResult_c,HLS_Result_c);
         return 0;
    }
    void CheckResult(data_type * matlab_result,data_type * your_result)
    {
         int i;
         for(i = 0;i<N;i++)
         {
        	 printf("Idx %d: Error = %d 
    ",i,matlab_result[i]-your_result[i]);
         }
    }

    a.h

    {82},  {10},  {16},  {15},  {66},
    {91},  {28},  {98},  {43},  {4},
    {13},  {55},  {96},  {92},  {85},
    {92},  {96},  {49},  {80},  {94},
    {64},  {97},  {81},  {96},  {68}

    b.h

    {76},
    {75},
    {40},
    {66},
    {18}

    c.h

    {9800},
    {15846},
    {16555},
    {23124},
    {22939}

    ip核顶层:

    	module test_multiply_v1_0 #
    	(
    		// Users to add parameters here
    
    		// User parameters ends
    		// Do not modify the parameters beyond this line
    
    
    		// Parameters of Axi Slave Bus Interface S00_AXIS
    		parameter integer C_S00_AXIS_TDATA_WIDTH	= 32,
    
    		// Parameters of Axi Master Bus Interface M00_AXIS
    		parameter integer C_M00_AXIS_TDATA_WIDTH	= 32,
    		parameter integer C_M00_AXIS_START_COUNT	= 32
    	)
    	(
    		// Users to add ports here
    
    		// User ports ends
    		// Do not modify the ports beyond this line
    
    
    		// Ports of Axi Slave Bus Interface S00_AXIS
    		input wire  s00_axis_aclk,
    		input wire  s00_axis_aresetn,
    		output wire  s00_axis_tready,
    		input wire [C_S00_AXIS_TDATA_WIDTH-1 : 0] s00_axis_tdata,
    		input wire [(C_S00_AXIS_TDATA_WIDTH/8)-1 : 0] s00_axis_tstrb,
    		input wire  s00_axis_tlast,
    		input wire  s00_axis_tvalid,
    
    		// Ports of Axi Master Bus Interface M00_AXIS
    		input wire  m00_axis_aclk,
    		input wire  m00_axis_aresetn,
    		output wire  m00_axis_tvalid,
    		output wire [C_M00_AXIS_TDATA_WIDTH-1 : 0] m00_axis_tdata,
    		output wire [(C_M00_AXIS_TDATA_WIDTH/8)-1 : 0] m00_axis_tstrb,
    		output wire  m00_axis_tlast,
    		input wire  m00_axis_tready
    	);
    // Instantiation of Axi Bus Interface S00_AXIS
    	
    
    	// Add user logic here
    	my_stream_ip my_stream_ip_v1_0_S01_AXIS_inst (
                    .ACLK(s00_axis_aclk),
                    .ARESETN(s00_axis_aresetn),
                    .S_AXIS_TREADY(s00_axis_tready),
                    .S_AXIS_TDATA(s00_axis_tdata),
                    .S_AXIS_TLAST(s00_axis_tlast),
                    .S_AXIS_TVALID(s00_axis_tvalid),
                    .M_AXIS_TVALID(m00_axis_tvalid),
                    .M_AXIS_TDATA(m00_axis_tdata),
                    .M_AXIS_TLAST(m00_axis_tlast),
                    .M_AXIS_TREADY(m00_axis_tready)
                );  
    	// User logic ends
    
    	endmodule

    ip核:(未完成)

    `timescale 1ns / 1ps
    module my_stream_ip 
     ( 
      ACLK, 
      ARESETN, 
      S_AXIS_TREADY, 
      S_AXIS_TDATA, 
      S_AXIS_TLAST, 
      S_AXIS_TVALID, 
      M_AXIS_TVALID, 
      M_AXIS_TDATA, 
      M_AXIS_TLAST, 
      M_AXIS_TREADY, 
     
     ); 
       
    input                                    ACLK; 
    input                                    ARESETN; 
    output                                   S_AXIS_TREADY; 
    input      [31 :0]                      S_AXIS_TDATA; 
    input                                    S_AXIS_TLAST; 
    input                                    S_AXIS_TVALID; 
    output                                   M_AXIS_TVALID; 
    output     [31 :0]                      M_AXIS_TDATA; 
    output                                   M_AXIS_TLAST; 
    input                                    M_AXIS_TREADY; 
      
       
       localparam NUMBER_OF_INPUT_WORDS  = 30; 
       
       localparam NUMBER_OF_OUTPUT_WORDS = 30; 
       
       localparam Idle  =3'b100; 
       localparam Read_Inputs = 3'b010; 
       localparam Write_Outputs  = 3'b001; 
       localparam Wait_Calculate = 3'b000;//my add
     
        //send...
       reg start2;
       reg reset2;
       //get...
       wire done2;
       wire idle2;
       wire ready2;
       //data  
       reg [31:0] AA [0:29];
       reg [31:0] bb [0:4];
       wire [31:0] cc [0:4];
       wire cc_val [0:4];
       reg [31:0] AA_index;
       reg [31:0] bb_index;
       reg [31:0] cc_index;
       reg [2:0] state; 
       
       reg [31:0] sum; 
       
       reg [NUMBER_OF_INPUT_WORDS -1:0] nr_of_reads; 
       reg [NUMBER_OF_OUTPUT_WORDS - 1:0] nr_of_writes; 
       
       assign S_AXIS_TREADY  =(state == Read_Inputs); 
       assign M_AXIS_TVALID = (state == Write_Outputs); 
       
       assign M_AXIS_TDATA = sum; 
       assign M_AXIS_TLAST = (nr_of_writes == 1); 
     
       always @(posedge ACLK) 
       begin  // process The_SW_accelerator 
          if(!ARESETN)              // Synchronous reset (active low) 
            begin 
              state        <= Idle; 
               nr_of_reads <= 0; 
               nr_of_writes <=0; 
              sum          <= 0; 
               AA_index <= 0;
               bb_index <= 0;
               reset2 <= 1;
               start2 <= 0;
            end 
          else 
            case (state) 
              Idle: 
                if (S_AXIS_TVALID== 1) 
                begin 
                 state       <= Read_Inputs; 
                 nr_of_reads <= NUMBER_OF_INPUT_WORDS - 1; 
                 sum         <= 0; 
                end 
       
             Read_Inputs: 
                if(S_AXIS_TVALID == 1) 
                begin 
                
                 if(nr_of_reads >= 5)
                 begin
                    AA[AA_index] <= S_AXIS_TDATA;
                    AA_index <= AA_index + 1;
                 end
                 else
                 begin
                    bb[bb_index] <= S_AXIS_TDATA;
                    bb_index <= bb_index + 1;
                 end
                 if (nr_of_reads == 0) 
                   begin 
                     state        <= Write_Outputs; 
                     reset2 <= 0;
                     start2 <= 1;
                     nr_of_writes <= NUMBER_OF_OUTPUT_WORDS - 1; 
                   end 
                 else 
                   nr_of_reads <= nr_of_reads - 1; 
                end 
       
             Wait_Calculate:
                if(done2 == 0)
                begin
                    sum <= cc[0];
                    state <= Write_Outputs;
                end
                
             Write_Outputs: 
                if(M_AXIS_TREADY == 1) 
                begin 
                 if (nr_of_writes == 0) 
                    state <= Idle; 
                  else 
                    sum <= done2;
                    nr_of_writes <= nr_of_writes - 1; 
                end 
            endcase 
       end 
       MatrixMultiply U1 (
          .ap_clk(S_AXI_ACLK),
          .ap_rst(reset2),
          .ap_start(start2),
          .ap_done(done2),
          .ap_idle(idle2),
          .ap_ready(ready2),
          .AA_0(AA[0]),
          .AA_1(AA[1]),
          .AA_2(AA[2]),
          .AA_3(AA[3]),
          .AA_4(AA[4]),
          .AA_5(AA[5]),
          .AA_6(AA[6]),
          .AA_7(AA[7]),
          .AA_8(AA[8]),
          .AA_9(AA[9]),
          .AA_10(AA[10]),
          .AA_11(AA[11]),
          .AA_12(AA[12]),
          .AA_13(AA[13]),
          .AA_14(AA[14]),
          .AA_15(AA[15]),
          .AA_16(AA[16]),
          .AA_17(AA[17]),
          .AA_18(AA[18]),
          .AA_19(AA[19]),
          .AA_20(AA[20]),
          .AA_21(AA[21]),
          .AA_22(AA[22]),
          .AA_23(AA[23]),
          .AA_24(AA[24]),
          .bb_0(bb[0]),
          .bb_1(bb[1]),
          .bb_2(bb[2]),
          .bb_3(bb[3]),
          .bb_4(bb[4]),
          .cc_0(cc[0]),
          .cc_0_ap_vld(cc_val[0]),
          .cc_1(cc[1]),
          .cc_1_ap_vld(cc_val[1]),
          .cc_2(cc[2]),
          .cc_2_ap_vld(cc_val[2]),
          .cc_3(cc[3]),
          .cc_3_ap_vld(cc_val[3]),
          .cc_4(cc[4]),
          .cc_4_ap_vld(cc_val[4])
          );   
    endmodule

    ip核引用的为HLS从c语言生成的verylog代码。

  • 相关阅读:
    C++文件流操作与流缓冲重定向
    转减小编译时间的两种做法
    AFX_MANAGE_STATE(AfxGetStaticModuleState())
    一个游戏程序员的资料一(转)
    ACE的Doublecheckedlocking的Singleton
    Hibernate 过滤器
    悲观锁 HibernateTest.java
    HQL 语句
    HQL 查询语句
    Hibernate 中继承映射之三 每一个类一个表
  • 原文地址:https://www.cnblogs.com/shenerguang/p/3797144.html
Copyright © 2011-2022 走看看