python简单实现hadoop map reduce统计功能
#!/usr/bin/env pythondef hadoopMap(fd): lines={} for line in fd: if lines.has_key(line): lines[line]+=1 else: lines[line]=1 return lines
·
#!/usr/bin/env python
def hadoopMap(fd):
lines={}
for line in fd:
if lines.has_key(line):
lines[line]+=1
else:
lines[line]=1
return lines
def hadoopReduce(result,mapResult):
for key in mapResult:
if result.has_key(key):
result[key]+=mapResult[key]
else:
result[key]=mapResult[key]
print result
return result
fd=(open("test.0",'r'),open("test.1"))
mapData=map(hadoopMap,fd)
for fds in fd:
fds.close()
print mapData
reduceResult=reduce(hadoopReduce,mapData,{})
print reduceResult
更多推荐
已为社区贡献1条内容
所有评论(0)