zoukankan      html  css  js  c++  java
  • sharc dsp 学习记录1---2014-07-30

    从今天开始记录学习sharc dsp过程中的点点滴滴吧。

     

    image

    DPI:Digital Peripheral Interface

    DAI:Digital Audio Interface

     

    SHARC SIMD Core

    SIMD 单指令多数据

    ADSP-2148x包含两个用作单指令、多数据(SIMD)引擎的计算处理器元件,分别称为PEX和PEY,各元件均由ALU、乘法器、移位器和寄存器文件组成。

    PEX始终有效,PEY可通过将MODE1寄存器的PEYEN模式位设为1来使能。SIMD模式允许处理器在两个处理元件中执行同一指令,但各处理元件处理不

    同的数据。这种架构对于执行计算密集型DSP算法非常有效

     

    SIMD Enabled:

    When any computation or data access is executed, it will be performed automatically in both processing elements

    F0 = F1 + F2; explicitly defined in source code Will execute in PEx

    S0 = S1 + S2; implicit operation not defined in source code Will be automatically executed in PEy in the same instruction cycle

     

    SIMD Disabled (SISD Mode):

    Only the explicit instruction will be executed in PEx

    image

     

    PM地址总线的宽度 = 24bits 。可最多访问16Mbit的程序/数据。

    PM数据总线的宽度 = 48bits。 用以存取48 位字长的指令,当用来存放数据时,32 位单精度浮点数或32位定点数将位于48 位的高32 位中

     

    48位指令字支持各种并行操作,可实现简练编程。例如,处理器可以有条件地在两个处理元件中执行乘法、加法和减法,同时进行分支并从存储

    器获取最多4个32位数据值,所有这些只需一个指令。

     

     

    image

     

     

    汇编程序分析(FIR)


    #include    "def21364.h"                    /* Symbol Definition File */

    #define     TAPS    64                        /* length of filter */
    #define     N       128                        /* number of samples */

    .extern    ss_fir;

    /* DM data */
    .section/dm seg_dmda;                      /* Segments are declared in the .ldf file*/
    .ALIGN 2;                                         /* Set alignment to long-word boundary for next variable */
    .var    dline[TAPS+1];                        /* delay line compensate for circ buffer, see comments in SS_FIR.asm */
    .ALIGN 2;
    .var    input[N] = "input.dat";            /* array of samples */


    /* PM data */
    .section/dm seg_pmda;
    .ALIGN 2;
    .var    coeffs[TAPS] = "ssfcoeffs.dat";        /* Filter coefficients */
    .ALIGN 2;
    .var    output[N];                                    /* Output array */


    /* PM interrupt vector code */
    .section/pm   seg_rth;
    Reserved_1:     rti; nop; nop; nop;
    Chip_Reset:     idle; jump start; nop; nop;


    /* program memory code */
    .segment/pm seg_pmco;

    start:
    /*--------------------- Setup modify registers for arrays --------------------*/
        m1 = 1;
        m2 = -1;
        m3 = 2;

          m9 = 2;
        m10= 1;
    /*----------------------------------------------------------------------------*/

     

    /*--------------------- Initialization delay line ----------------------------*/
        b0 = dline;
        l0 = @dline-1;

        f8=0.0;
        lcntr = TAPS, do clear_fir until lce;
    clear_fir:  dm(i0,m1) = f8;

        i0 = dline;
    /*----------------------------------------------------------------------------*/

     

    /*-------------- Setup DAGs for input/output/coeffs and call ss_fir ----------*/
        r3 = TAPS;
        r3 = lshift r3 by -1;                     /* r3 = taps/2 due to SIMD mode */
        r0 = 3;                                      /* 3 macs outside of fir mac loop */
        r3 = r3 - r0;                              /* r3 = taps/2 - 3 for fir mac loop counter */
       
        b1 = input;
        l1 = 0;

        b9 = output;
        l9 = 0;

        b8 = coeffs;
        l8 = @coeffs;
       
    lcntr = N, do fir_loop until lce;
                call ss_fir (db);                /* Call fir */
                f0 = dm(i1,m1);              /* Read one sample */
                nop;                              /* Call can't be in last three locations of a loop */
    fir_loop:    pm(i9,m10)=f8;          /* Write result to output */
    /*----------------------------------------------------------------------------*/
     
         

    /* Terminate and wait */
    wait1:    idle;
            jump wait1;

     

     

    #include    "def21364.h"                /* Symbol Definition File */

    .global ss_fir;

    /* program memory code */
    .section/pm seg_pmco;

    ss_fir:
        bit set MODE1 CBUFEN;               /* Circular Buffer Enable, one cycle effect latency */
        nop;                                          /* Circular Buffering not in effect until next cycle */
       
        s0 = dm(i0, m1);                        /* move pointer to delay[1] */
       
        bit set MODE1 PEYEN;                 /* SIMD Mode Enable, one cycle effect latency */
        s0 = dm(i0, m2);                        /* load s0 with the value of delay[1] for SIMD store, move pointer to delay[0] */
       
        dm(i0,m3)=f0, f4 = pm(i8,m9);    /* transfer sample to delayline, done in SIMD to load end of buffer + 1 */
                                                         /* to compensate for circular buffer issue described above, read 2 coeffs */                                       

            f8=f0*f4, f0=dm(i0,m3), f4=pm(i8,m9);                          /* samples * coeffs, read 2 samples, read 2 coeffs */
            f12=f0*f4, f0=dm(i0,m3), f4=pm(i8,m9);                        /* samples * coeffs, read 2 samples, read 2 coeffs */
            lcntr=r3, do macs until lce;                                             /* FIR loop */
    macs:f12=f0*f4, f8=f8+f12, f0=dm(i0,m3), f4=pm(i8,m9);       /* samples * coeffs, accum, read 2 samples, read 2 coeffs */
            f12=f0*f4, f8=f8+f12, s0=dm(i0,m2);                            /* samples * coeffs, accum, dummy read to move pointer to oldest sample */
            f8=f8+f12;                                                                   /* final SIMD accum */
            r12=s8;                                                                       /* move PEy total into PEx register file */
           
        rts (db);
        bit clr MODE1 CBUFEN | PEYEN;                                         /* Circular Buffer Disable, SIMD Mode Disable */
        f8=f8+f12;                                                                      /* last accum */

     

    dline的读写

    image

    红色表示进入macs 循环前 i0指针指向的位置。

     

    该程序使用了循环寻址的方式

    Bx、Lx、Ix 三个寄存器的序号x 必须一致,而Mx 寄存器可以在同一个DAG 组中任意选取

    R3 = lshift R3 by -1;  //右移1bit

  • 相关阅读:
    为什么项目经理非常难有节操的选举
    二叉查找树的删除操作
    二叉查找树的前驱后继
    替罪羊树
    树链剖分
    DFS序
    bzoj3224: Tyvj 1728 普通平衡树(平衡树)
    splay树入门(带3个例题)
    红黑树
    AVL树
  • 原文地址:https://www.cnblogs.com/ldjrl2013/p/3910376.html
Copyright © 2011-2022 走看看