使用import需要在所有宿主机安装相应模块,但使用ctypes库可以调用
- test.c
#include <stdio.h> int add(int); int out(); int buffer[100]; int count = 0; int add(int num) { if (count < 100) { buffer[count] = num; count++; } return 0; } int out() { int i; for (i = 0; i < count; i++) { printf("%d ", buffer[i]); } return 0; }
- test_map.py
#!/usr/bin/python #coding=utf8 import sys from ctypes import * so = './test.so' cdll.LoadLibrary(so) api = CDLL(so) api.add.argtypes = [c_int] api.add.restype = c_int api.out.argtypes = [] api.out.restype = c_int for line in sys.stdin: if line.find("100013221") != -1: v = line.strip().split(' ') api.add(int(v[1])) api.out()
- test_red.py
#!/usr/bin/python #coding=utf8 import sys for line in sys.stdin: sys.stdout.write(line)
- test.sh
hadoop jar $streaming -Dmapred.reduce.tasks=1 -Dmapred.output.compress=true -Dmapred.output.compression.codec=com.hadoop.compression.lzo.LzoCodec -Dmapred.output.compression.type=BLOCK -Dmapred.compress.map.output=true -Dmapred.map.output.compression.codec=com.hadoop.compression.lzo.LzoCodec -input $input -output $output -mapper "test_map.py" -file test_map.py -reducer "test_red.py" -file test_red.py -file test.so
参考文献:
https://blog.csdn.net/frankie110/article/details/8724509/
http://www.isnowfy.com/introduction-to-python-c-extension/