#!/usr/bin/env python

def hadoopMap(fd):
    lines={}
    for line in fd:
        if lines.has_key(line):
            lines[line]+=1
        else:
            lines[line]=1

    return lines

def hadoopReduce(result,mapResult):
    for key in mapResult:
        if result.has_key(key):
            result[key]+=mapResult[key]
        else:
            result[key]=mapResult[key]
    print result
    return result
fd=(open("test.0",'r'),open("test.1"))
mapData=map(hadoopMap,fd)
for fds in fd:
    fds.close()
print mapData
reduceResult=reduce(hadoopReduce,mapData,{})
print reduceResult

Logo

CSDN联合极客时间,共同打造面向开发者的精品内容学习社区,助力成长!

更多推荐