hive udf (python)
searchstr_map.py#!/usr/bin/env python#coding:utf-8import sysimport refor line in sys.stdin:line = line.strip()fields = line.split("\t")cityid = str(fields[0])searchstr = fields[1]
·
searchstr_map.py
#!/usr/bin/env python
#coding:utf-8
import sys
import re
for line in sys.stdin:
line = line.strip()
fields = line.split("\t")
cityid = str(fields[0])
searchstr = fields[1]
pv = str(fields[2])
qrys = re.findall(r'([a-z][\-_.0-9]+)', searchstr)
qrys.sort()
sorted_str = ''.join(qrys)
print '%s\t%s\t%s\t%s' % (cityid, searchstr, sorted_str, pv)
hive 命令行使用:
add file searchstr_map.py
select
TRANSFORM (src.cityid, src.searchstr, src.pv)
USING 'python searchstr_map.py'
AS (cityid, searchstr, sorted_str, pv)
from
(select
if(cityid='1', 0, cast(cityid as int)) cityid,
a.searchstr,
count(1) pv
from
(select
split(request['cityid'], ',') cityid_arr,
request['searchstr'] searchstr
from db.tablename where dt_ymd=20170731
and request_ser like '%/search/carinfo%'
and request['searchstr'] is not null and request['searchstr'] <> '') a
lateral view explode(a.cityid_arr)t as cityid
where cityid rlike '^[0-9]+$'
group by if(cityid='1', 0, cast(cityid as int)), a.searchstr) src
更多推荐
所有评论(0)