伪随机数的爆破–2
1 简介
php中的mt_rand是采用梅森旋转算法MT19937,但在php 7.1.0之前的版本中,其实现的不是标准算法,造成容易猜解。可以参考php里的随机数这篇文章。还有官方文档的changelog。并且已经存在很多工具用来爆破种子,例如著名的php_mt_seed。
这里我参考了php_mt_seed文章中引用的基于CUDA框架的爆破算法,自己稍作修改,可以看到用GPU爆破比cpu的速度要快很多。当然,这取决与你的显卡和CPU。
原始的cuda代码在这里mt_rand.cu
2 具体实现
由于要用clojure的gpu相关的库,要添加依赖到deps.edn:
{:deps {uncomplicate/neanderthal {:mvn/version "0.22.0"}}}
然后是gpu执行的设备代码, mt_rand_me.cu:
typedef unsigned int uint32_t; #define MT_N (624) #define N MT_N /* length of state vector */ #define M (397) /* a period parameter */ #define hiBit(u) ((u) & 0x80000000U) /* mask all but highest bit of u */ #define loBit(u) ((u) & 0x00000001U) /* mask all but lowest bit of u */ #define loBits(u) ((u) & 0x7FFFFFFFU) /* mask the highest bit of u */ #define mixBits(u, v) (hiBit(u)|loBits(v)) /* move hi bit of u to hi bit of v */ #define twist(m,u,v) (m ^ (mixBits(u,v)>>1) ^ ((uint32_t)(-(uint32_t)(loBit(u))) & 0x9908b0dfU)) #define PHP_MT_RAND_MAX ((long) (0x7FFFFFFF)) /* (1<<31) - 1 */ #define RAND_RANGE(__n, __min, __max, __tmax) ((__min) + (long) ((double) ( (double) (__max) - (__min) + 1.0) * ((__n) / ((__tmax) + 1.0)))) /* Calculate mt_rand after some amount of steps. Due to memory optimization 'step' must statisfy condition: step < N - M */ __device__ uint32_t mt_rand( uint32_t seed, int step ) { uint32_t p0,p1,pM,cs; // cs - value current state array cell int i; //// init step cs = seed & 0xffffffffU; for( i = 1; i <= step; i++ ) // corect is ( i = 1; i < N; i++ ) cs = ( 1812433253U * ( cs ^ (cs >> 30) ) + i ) & 0xffffffffU; p0 = cs; cs = ( 1812433253U * ( cs ^ (cs >> 30) ) + i ) & 0xffffffffU; p1 = cs; for( i = step + 2; i <= step + M; i++ ) // corect is ( i = 1; i < N; i++ ) cs = ( 1812433253U * ( cs ^ (cs >> 30) ) + i ) & 0xffffffffU; pM = cs; //for( i = 1; i <= step + M; i++ ) // corect is ( i = 1; i < N; i++ ) // cs = ( 1812433253U * ( cs ^ (cs >> 30) ) + i ) & 0xffffffffU; // if( i == step + 0 ) p0 = cs; // if( i == step + 1 ) p1 = cs; // if( i == step + M ) pM = cs; //} //// reload (twist) step uint32_t x = twist( pM, p0, p1 ); x ^= x >> 11; x ^= (x << 7) & 0x9d2c5680U; x ^= (x << 15) & 0xefc60000U; x ^= x >> 18; x = (long)(x >> 1); return x; } __device__ uint32_t mt_rand_mm( uint32_t seed, int step, int min, int max ) { return RAND_RANGE( mt_rand( seed, step ), min, max, PHP_MT_RAND_MAX ); } // 上面的代码直接取自mt_rand.cu extern "C" __global__ void mt_rand_find(uint32_t sseed, //测试的种子 uint32_t* values, // 已知的随机数序列 uint32_t length, // 已知随机数序列的长度 int min, // 最小值 int max, // 最大值 bool *found, // 是否找到的标记 uint32_t* sols // 用于保存找到的seed结果 ) { uint32_t gid = blockDim.x * blockIdx.x + threadIdx.x; // 这里会有很多个GPU线程同时执行,所以把结果保存在sols数组中,每个线程一个数组项 // gid相当于线程顺序id,每个线程测试一个seed // 整个函数相当于多个GPU线程同时对[sseed, sseed+总的线程数量]范围内的数字作为种子进行爆破 uint32_t seed = sseed + gid; sols[ gid ] = 0; for(int i = 0; i < length; i++){ uint32_t x = mt_rand_mm(seed, i, min, max); if(!(values[i] == x)) return; } *found = true; // indicate that we found solution sols[ gid ] = seed; // here is solution } //只用一个线程,用于测试rand extern "C" __global__ void mt_rand_one( uint32_t seed, int step, int min, int max, uint32_t* ret) { uint32_t x = mt_rand_mm( seed, step, min, max); *ret = x; }
最后是clojure的host代码:
(require '[uncomplicate.clojurecuda.core :refer :all]) (require '[uncomplicate.commons.core :refer :all]) ;; 初始化设备 (init) (device-count) ;; => 1 ;; 显卡个数 ;; 选择gpu和上下文,device的参数为显卡id (def my-gpu (device 0)) (def ctx (context my-gpu)) ;; 设置当前上下文 (current-context! ctx) (def php-rand (slurp "./mt_rand_me.cu")) ;; 编译并导出函数,本机要安装CUDA toolkit ;; 具体过程参考https://dragan.rocks/articles/18/Interactive-GPU-Programming-1-Hello-CUDA (def rand-program (compile! (program php-rand))) (def rand-module (module rand-program)) (def mt-rand-find(function rand-module "mt_rand_find")) (def mt-rand-one(function rand-module "mt_rand_one")) (def threads 2000000) ;; GPU线程数量 (def size threads) ;; 保存GPU计算结果的数组大小,等同于GPU线程数量 (def bool-size 1) ;; boolean类型的长度,c内存表示 (def uint-size 4) ;; uint 类型的长度,c内存表示 (def max-rand (int (dec (Math/pow 2 31)))) (defn find-rand-one-block [n values & [opts]] (let [found (mem-alloc bool-size) _ (memcpy-host! (byte-array [0]) found) min (get opts :min 0) max (get opts :max max-rand) ;; 复制values到gpu内存 values-len (count values) values-match (mem-alloc (* uint-size values-len)) _ (memcpy-host! (int-array values) values-match) ;; 分配保存结果的数组的内存 sols-len (* size uint-size) sols (mem-alloc sols-len) _ (launch! mt-rand-find (grid-1d size) (parameters n values-match values-len min max found sols)) ret-found (-> (memcpy-host! found (byte-array 1)) first) ret-sols (memcpy-host! sols (int-array size))] ;; 释放GPU内存 (release sols) (release values-match) (release found) (when-not (zero? ret-found) (println "block:" n "ret found:" ret-found) (filter (comp not zero?) ret-sols)))) (def max-blocks (/ 0xffffffff (+ size 1))) (defn find-all-seed "在32位地址空间中寻找匹配vals序列的种子" [vals & [opts]] (doseq [n (range (int (Math/ceil max-blocks)))] (let [rs (find-rand-one-block (* size n) vals opts)] (when rs (doseq [r rs] (println "found:" (Integer/toUnsignedString r))))))) (time (find-all-seed [617664816])) ;; block: 28000000 ret found: 1 ;; found: 28833322 ;; block: 368000000 ret found: 1 ;; found: 368690622 ;; block: 2398000000 ret found: 1 ;; found: 2398389484 ;; "Elapsed time: 15790.4927 msecs" (defn rand-one [seed & [opts]] (let [step (get opts :step 0) ret (mem-alloc 50) min (get opts :min 0) max (get opts :max max-rand) _ (launch! mt-rand-one (grid-1d 1) (parameters seed step min max ret)) ret-sols (memcpy-host! ret (int-array 1))] (release ret) (first ret-sols))) (comment ;; 测试是否和php中生成的结果一致 (rand-one 1234 {:step 0}) ;; => 1741177057 (rand-one 1234 {:step 1}) ;; => 1068724585 ;; 可以看到和php中生成的结果一致,*注意*,在php7.1以上mt_srand用MT_RAND_PHP参数, ;; 用于生成和php5采用的mt_rand算法一致的结果 ;; php -r 'mt_srand(1234,MT_RAND_PHP); echo mt_rand()."---".mt_rand(). " ";' ;; 1741177057---1068724585 ) ;; 清理环境 (release rand-module) (release rand-program) (release ctx)
php_mt_seed的结果:
$ time php_mt_seed 617664816 Found 3, trying 3690433088 - 3690987519, speed 112249734 seeds per second seed = 28833322 seed = 368690622 seed = 2398389484 Found 3 Found 3, trying 4261412864 - 4294967295, speed 112251741 seeds per second real 0m38.282s user 0m0.000s sys 0m0.000s
下面测试随机字符串序列,如下php代码:
<?php function randStr($l=4){ $ret=""; $chars="qwertyuiopasdfghjklzxcvbnm1234567890QWERTYUIOPASDFGHJKLZXCVBNM"; for($i=0;$i<$l;$i++) $ret.=$chars[mt_rand(0,strlen($chars))]; return $ret; } mt_srand( 6688991, MT_RAND_PHP ); echo randStr(5); ?>
randStr结果是:A4MFO
现在根据randStr的结果猜解出seed,先用clojure来实现:
;; 首先要获得随机序列转换为字符串的转换表,才能把字符串转换回随机数序列。 ;; 这里假定已经有了这个表 (def chars "qwertyuiopasdfghjklzxcvbnm1234567890QWERTYUIOPASDFGHJKLZXCVBNM") (defn rand-strs->rand-seq "把随机结果字符串转换回随机数字序列" [strs] (mapv #(->> (str %1) (.indexOf chars)) strs)) ;; randStr结果 (def result "A4MFO") (def rand-seq (rand-strs->rand-seq result)) rand-seq ;; => [46 29 61 49 44] (def max-r (count chars)) max-r ;; => 62 (time (find-all-seed rand-seq {:max max-r})) ;; block: 6000000 ret found: 1 ;; found: 6688991 ;; block: 200000000 ret found: 1 ;; found: 201323601 ;; block: 3658000000 ret found: 1 ;; found: 3658569207 ;; block: 3892000000 ret found: 1 ;; found: 3893347456 ;; "Elapsed time: 20043.783126 msecs"
随着要测试的序列增大,速度会降低,而且max值比较小,会有很多个符合结果序列的种子。
下面测试php_mt_seed的效果:
$ time php_mt_seed 46 46 0 62 29 29 0 62 61 61 0 62 49 49 0 62 44 44 0 62 Found 3, trying 3690Pattern: EXACT-FROM-63 EXACT-FROM-63 EXACT-FROM-63 EXACT-FROM-63 EXACT-FROM-63 seed = 6688991 seed = 201323601 seed = 3658569207 seed = 3893347456 Found 4 Found 4, trying 4261412864 - 4294967295, speed 107807449 seeds per second real 0m39.848s user 0m0.000s sys 0m0.000s
可以看到就算测试的序列增加,php_mt_seed的速度也比较稳定,可能它是在一次rand计算中比较整个序列的,所以不管序列有多长,对速度没什么影响,而我这里的GPU代码,序列中每个值的比较都要调用mt_rand,而mt_rand再根据step去计算结果,因此mt_rand_find的执行时间会随着序列长度的增加而增加,只做个简单的比较,这里就不再优化算法实现了。
3 总结
可以看到GPU的速度非常快,对于一个数字的计算,本机测试大概16秒,而php_mt_seed大概要38秒。主要是因为GPU的线程数量很多,示例中开了2000000线程,我只是无脑开,对于CUDA编程没有研究过,通过这次实验,体验下GPU编程。
Created: 2019-03-17 周日 09:00