排序练习
问题一:
现在有一个列表,列表中的数范围都在0到100之间,列表长度大约为100万。设计算法在O(n)时间复杂度内将列表进行排序。
import random data = [random.randint(0,100) for x in range(10000)] def count_sort(data): li = [0 for i in range(101)] for x in data: li[x] +=1 count = 0 for k,v in enumerate(li): for i in range(v): data[count]=k count +=1 count_sort(data)
问题二:
现在有n个数(n>10000),设计算法,按大小顺序得到前10大的数。 应用场景:榜单TOP 10
1、插入排序:
import time import random def call_time(func): def inner(*args,**kwargs): t1 = time.time() re = func(*args,**kwargs) t2 = time.time() print('Time cost:',func.__name__,t2-t1) return re return inner def insert(li, i): tmp = li[i] j = i - 1 while j >= 0 and li[j] > tmp: li[j + 1] = li[j] j = j - 1 li[j + 1] = tmp def insert_sort(li): for i in range(1, len(li)): insert(li, i) @call_time def topk(li, k): #时间复杂度O(kn) top = li[0:k + 1] insert_sort(top) for i in range(k+1, len(li)): top[k] = li[i] insert(top, k) return top[:-1] data = list(range(10000)) random.shuffle(data) print(topk(data, 10)) # Time cost: topk 0.020502567291259766 # [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
2、堆的方式:
取列表前10个元素建立一个小根堆。堆顶就是目前第10大的数。 依次向后遍历原列表,对于列表中的元素,如果小于堆顶,则忽略该元素;如果大于堆顶,则将堆顶更换为该元素,并且对堆进行一次调整; 遍历列表所有元素后,倒序弹出堆顶
import time import random def call_time(func): def inner(*args,**kwargs): t1 = time.time() re = func(*args,**kwargs) t2 = time.time() print('Time cost:',func.__name__,t2-t1) return re return inner def sift(data, low, high): i = low j = 2 * i + 1 tmp = data[i] while j <= high: #孩子在堆里 if j + 1 <= high and data[j] < data[j+1]: #如果有右孩子且比左孩子大 j += 1 #j指向右孩子 if data[j] > tmp: #孩子比最高领导大 data[i] = data[j] #孩子填到父亲的空位上 i = j #孩子成为新父亲 j = 2 * i +1 #新孩子 else: break data[i] = tmp #最高领导放到父亲位置 @call_time def topn(li, n): #时间复杂度O(nlogk) heap = li[0:n] # 构造包含n个元素列表的大栈堆 for i in range(n // 2 - 1, -1, -1): sift(heap, i, n - 1) # 把列表中前n个小的数留到栈堆中 for i in range(n, len(li)): if li[i] < heap[0]: heap[0] = li[i] sift(heap, 0, n - 1) # 把栈堆从小到大排列起来 for i in range(n - 1, -1, -1): # i指向堆的最后 heap[0], heap[i] = heap[i], heap[0] # 领导退休,刁民上位 sift(heap, 0, i - 1) # 调整出新领导 return heap data = list(range(10000)) random.shuffle(data) print(topn(data, 10)) # Time cost: topn 0.0015001296997070312 # [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
问题三:
给定一个列表和一个整数,设计算法找到两个数的下标,使得两个数之和为给定的整数
保证肯定仅有一个结果。 例如,列表[1,2,5,4]与目标整数3,1+2=3,结果为(0, 1)
二分查找的思路:
def bin_search(data_set, val): low = 0 high = len(data_set) - 1 while low <= high: mid = (low+high)//2 if data_set[mid] == val: left = mid right = mid while left >= 0 and data_set[left] == val: left -= 1 while right < len(data_set) and data_set[right] == val: right += 1 return (left + 1, right - 1) elif data_set[mid] < val: low = mid + 1 else: high = mid - 1 return li = [1,2,3,3,3,4,4,5] print(bin_search(li, 5)) # (7, 7)
问题四:
给定一个升序列表和一个整数,返回该整数在列表中的下标范围
例如:列表[1,2,3,3,3,4,4,5],若查找3,则返回(2,4);若查找1,则返回[0,0]
import copy li = [1, 5, 4, 2] target = 3 max_num = 100 def func1(): for i in range(len(li)): for j in range(i+1, len(li)): if li[i] + li[j] == target: return (i,j) def bin_search(data_set, val, low, high): while low <= high: mid = (low+high)//2 if data_set[mid] == val: return mid elif data_set[mid] < val: low = mid + 1 else: high = mid - 1 return def func2(): li2 = copy.deepcopy(li) li2.sort() for i in range(len(li2)): a = i b = bin_search(li2, target - li2[a], i+1, len(li2)-1) if b: return (li.index(li2[a]),li.index(li2[b])) def func3(): # O(n)复杂度 a = [None for i in range(max_num+1)] for i in range(len(li)): a[li[i]] = i if a[target-li[i]] != None: return (a[li[i]], a[target-li[i]]) print(func3()) data_dict = {} for i in range(len(data_list)): if data_list[i] in data_dict: print(data_dict[data_list[i]], i) else: data_dict[13 - data_list[i]] = i