zoukankan      html  css  js  c++  java
  • python中的快速排序

           在工程实际中,经常需要将python代码转化成c++代码,为了获得一样的结果,需要保证算法的一致性。最近在目标检测的算法中,发现python默认排序算法为改进版的快速排序,描述如下:

    * Quick sort is usually the fastest, but the worst case scenario is O(N^2) so
    * the code switches to the O(NlogN) worst case heapsort if not enough progress
    * is made on the large side of the two quicksort partitions. This improves the
    * worst case while still retaining the speed of quicksort for the common case.
    * This is variant known as introsort.
    *
    *
    * def introsort(lower, higher, recursion_limit=log2(higher - lower + 1) * 2):
    *   # sort remainder with heapsort if we are not making enough progress
    *   # we arbitrarily choose 2 * log(n) as the cutoff point
    *   if recursion_limit < 0:
    *       heapsort(lower, higher)
    *       return
    *
    *   if lower < higher:
    *      pivot_pos = partition(lower, higher)
    *      # recurse into smaller first and leave larger on stack
    *      # this limits the required stack space
    *      if (pivot_pos - lower > higher - pivot_pos):
    *          quicksort(pivot_pos + 1, higher, recursion_limit - 1)
    *          quicksort(lower, pivot_pos, recursion_limit - 1)
    *      else:
    *          quicksort(lower, pivot_pos, recursion_limit - 1)
    *          quicksort(pivot_pos + 1, higher, recursion_limit - 1)
    *
    *
    * the below code implements this converted to an iteration and as an
    * additional minor optimization skips the recursion depth checking on the
    * smaller partition as it is always less than half of the remaining data and
    * will thus terminate fast enough

    从上面的内容可以看到,改进的算法结合了快速排序和堆排序,如果我们不按照这个算法去实现c++版的排序算法,很难保证结果一致性。

    以下是c++代码,复现了python语言numpy库中的argsort的快速排序算法

    #ifndef __myself__sort__h__h__
    #define __myself__sort__h__h__
    
    #define PYA_QS_STACK 100
    #define SMALL_QUICKSORT 15
    #define SMALL_MERGESORT 20
    
    template<typename T>
    void TYPE_SWAP(T* a, T* b)
    {
        T t = *a;
        *a = *b;
        *b = t;
    }
    
    template<typename T>
    bool TYPE_LT(T a, T b)
    {
        return a<b;
    }
    
    int npy_get_msb(int n)
    {
        int k;  
        for(k=0;n>1;n>>=1) 
            ++k;  
        return k; 
    }
    
    template<typename T>
    int heap_sort(T *start, int n)
    {
        T tmp, *a;
        int i,j,l;
    
        /* The array needs to be offset by one for heapsort indexing */
        a = start - 1;
        //先建立大顶堆
        for (l = n>>1; l > 0; --l) 
        {
            tmp = a[l];
            for (i = l, j = l<<1; j <= n;) 
            {
                //因为假设根结点的序号是1,所以i结点左孩子和右孩子分别为2j和2i+1
                if (j < n && TYPE_LT(a[j], a[j+1])) //左右孩子的比较
                {
                        j += 1;//j为较大的记录的下标
                }
                if (TYPE_LT(tmp, a[j])) 
                {
                    //将孩子结点上位,则以孩子结点的位置进行下一轮的筛选
                        a[i] = a[j];
                        i = j;
                        j += j;
                }
                else 
                {
                        break;
                }
            }
            a[i] = tmp;//插入最开始不和谐的元素
        }
        //进行排序
        for (; n > 1;) 
        {
            //最后一个元素和第一元素进行交换
            tmp = a[n];
            a[n] = a[1];
            n -= 1;
            //然后将剩下的无序元素继续调整为大顶堆
            for (i = 1, j = 2; j <= n;) 
            {
                if (j < n && TYPE_LT(a[j], a[j+1])) 
                {
                    j++;
                }
                if (TYPE_LT(tmp, a[j])) 
                {
                    a[i] = a[j];
                    i = j;
                    j += j;
                }
                else 
                {
                    break;
                }
            }
            a[i] = tmp;
        }    
        return 0;
    }
    
    template<typename T>
    int quick_sort(T *start, int num)
    {
        T vp;
        T *pl = start;
        T *pr = pl + num - 1;
        T *stack[PYA_QS_STACK];
        T **sptr = stack;
        T *pm, *pi, *pj, *pk;
        int depth[PYA_QS_STACK];
        int * psdepth = depth;
        int cdepth = npy_get_msb(num) * 2;
    
        for (;;) 
        {
            if (cdepth < 0) 
            {
                heap_sort(pl, pr - pl + 1);
                goto stack_pop;
            }
            while ((pr - pl) > SMALL_QUICKSORT) 
            {
                /* quicksort partition */
                pm = pl + ((pr - pl) >> 1);
                if (TYPE_LT(*pm, *pl)) TYPE_SWAP(pm, pl);
                if (TYPE_LT(*pr, *pm)) TYPE_SWAP(pr, pm);
                if (TYPE_LT(*pm, *pl)) TYPE_SWAP(pm, pl);
                vp = *pm;
                pi = pl;
                pj = pr - 1;
                TYPE_SWAP(pm, pj);
                for (;;) 
                {
                    do ++pi; while (TYPE_LT(*pi, vp));
                    do --pj; while (TYPE_LT(vp, *pj));
                    if (pi >= pj) 
                    {
                            break;
                    }
                    TYPE_SWAP(pi,pj);
                }
                pk = pr - 1;
                TYPE_SWAP(pi, pk);
                /* push largest partition on stack */
                if (pi - pl < pr - pi) 
                {
                    *sptr++ = pi + 1;
                    *sptr++ = pr;
                    pr = pi - 1;
                }
                else 
                {
                    *sptr++ = pl;
                    *sptr++ = pi - 1;
                    pl = pi + 1;
                }
                *psdepth++ = --cdepth;
            }
    
            /* insertion sort */
            for (pi = pl + 1; pi <= pr; ++pi) 
            {
                vp = *pi;
                pj = pi;
                pk = pi - 1;
                while (pj > pl && TYPE_LT(vp, *pk)) 
                {
                    *pj-- = *pk--;
                }
                *pj = vp;
            }
    stack_pop:
            if (sptr == stack) 
            {
                break;
            }
            pr = *(--sptr);
            pl = *(--sptr);
            cdepth = *(--psdepth);
        }
        return 0;
    }
    #endif

    使用方法:

    #include <iostream>
    #include <fstream>
    #include <cstdlib>
    #include <vector>
    #include <map>
    #include "mysort.h"
    using namespace std;
    
    typedef struct tagELEM
    {
        double value;
        int index;
        bool operator < (const tagELEM& e)
        {
            return value<e.value;
        }
    }ELEM, *PELEM;
    
    int main(int argc, char **argv) 
    {
        fstream fdata("data.txt", std::ios::in);
        if(!fdata.is_open())
            return -1;
        fstream fresult("result.txt", std::ios::in);
            if(!fresult.is_open())
            return -1;
        char data_buffer[255] ={0};
        char result_buffer[255] ={0};
        multimap<double, int> mapScore;
        vector<int> vecIdx;
        int idx = 0;
        
        ELEM elems[15280];
        while(!fresult.eof())
        {
            fdata.getline(data_buffer, 255);
            fresult.getline(result_buffer, 255);
            if(data_buffer[0]==0)
                continue;
            elems[idx].index = idx;
            elems[idx].value = atof(data_buffer);
            idx++;
             vecIdx.push_back(atoi(result_buffer));
        }
        
        quick_sort(elems, 15280);// 0, 15279);//, 15280);
        for(int i=0; i<15280; i++)
        {
            if(vecIdx[i] != elems[i].index)
            {
                std::cout<<i<<"	"<<elems[i].index<<"	"<<vecIdx[i]<<"	---------------------"<<elems[i].value<<std::endl;
            }
            else
            {
                std::cout<<i<<"	"<<elems[i].index<<"	"<<vecIdx[i]<<"	"<<elems[i].value<<std::endl;
            }
        }
        fdata.close();
        fresult.close();
        return 0;
    }

    data.txt为保存score的文件,result.tx保存了python排序结果

    参考:

    堆排序:http://www.cnblogs.com/mengdd/archive/2012/11/30/2796845.html

    快速排序:http://blog.csdn.net/morewindows/article/details/6684558

    python排序代码:https://github.com/numpy/numpy/tree/master/numpy/core/src/npysort

  • 相关阅读:
    KMP算法与字符串匹配问题
    贪婪算法(贪心算法)
    普里姆算法(Prim)与最小生成树问题
    克鲁斯卡尔算法(Kruskal算法)与最小生成树问题
    Dijkstra算法与最短路径问题
    SpringCloud(十一)使用actuator和dashborad、turbine对微服务进行监控
    博客美化——页面白天黑夜切换
    Spring5学习笔记——day01
    Mybatis学习笔记——day06
    Mybatis学习笔记——day05
  • 原文地址:https://www.cnblogs.com/linyuanzhou/p/6164842.html
Copyright © 2011-2022 走看看