urllib2乱码解决
#!/usr/bin/env python# -*- coding: utf-8 -*-"""Created on 2017-11-22@author: Negen"""import urllib2import urllibimport cookielibimport sysimport jsonreload(sys)sys.setdefaultencoding("utf
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Created on 2017-11-22
@author: Negen
"""
import urllib2
import urllib
import cookielib
import sys
import json
reload(sys)
sys.setdefaultencoding("utf-8")
# req = urllib2.Request("http://meishi.meituan.com/i/?ci=30&stid_b=1&cevent=imt%2Fhomepage%2Fcategory1%2F1")
# res = urllib2.urlopen(req)
# print res.read()
URL = 'http://meishi.meituan.com/i/?ci=30&stid_b=1&cevent=imt%2Fhomepage%2Fcategory1%2F1'
POST_URL_1 = "http://meishi.meituan.com/i/api/channel/deal/list"
cookie = cookielib.CookieJar()
handler = urllib2.HTTPCookieProcessor(cookie)
opener = urllib2.build_opener(handler)
opener.open(URL)
print cookie
# 请求头
# POST /i/api/channel/deal/list HTTP/1.1
# Host: meishi.meituan.com
# User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:57.0) Gecko/20100101 Firefox/57.0
# Accept: application/json
# Accept-Language: zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2
# Accept-Encoding: gzip, deflate
# Referer: http://meishi.meituan.com/i/?ci=30&stid_b=1&cevent=imt%2Fhomepage%2Fcategory1%2F1
# Content-Type: application/json
# x-requested-with: XMLHttpRequest
# Content-Length: 181
# Cookie: uuid=fde5207399f0bc0e4f02.1511322657.0.0.0; oc=HWcmne2V5gocN72iSp2W6nf4oSxvTOeQXS3BSHV5K0towR4FLx6Wy0Q7dkgNJpL_5bntemB2VelYvz6AVk4ChB4NULJUumoqZWHw5px35cR5MM7R3VBPkyNs0Lp959JYr0o7MM9ZwaqFxWG62qxQjfl2kK90EIkRAoSgoXk-hhc; ci=30; rvct=30; _lx_utm=utm_campaign%3Dbaidu%26utm_medium%3Dorganic%26utm_source%3Dbaidu%26utm_content%3Dhomepage%26utm_term%3D; _lxsdk_cuid=15fe1d9a765c8-01461cc3612c3b-49566c-100200-15fe1d9a766c8; IJSESSIONID=1vl0lc5i8s2osi8a33wov1kao; iuuid=38393C6E8D92360544E3C5AFB235B93F517247B9AD620FBB69E068DE60A2795B; cityname="%E6%B7%B1%E5%9C%B3"; i_extend=C_b1Gimthomepagecategory11H__a; latlng=22.543099,114.057868,1511328768910; __utma=74597006.1335519448.1511322706.1511322706.1511328729.2; __utmc=74597006; __utmz=74597006.1511328729.2.2.utmcsr=meishi.meituan.com|utmccn=(referral)|utmcmd=referral|utmcct=/i/poi/5896925; ci3=1; client-id=5f48120f-8a0b-4404-b04b-d3cacfd253b8; _lx_utm=utm_campaign%3Dbaidu%26utm_medium%3Dorganic%26utm_source%3Dbaidu%26utm_content%3Dhomepage; _hc.v=25634c66-97e5-a045-4689-729da9d4404b.1511323102; __mta=153950471.1511323102571.1511329392775.1511329693630.9; c_6NCia=1; _lxsdk_s=15fe1d9a766-32b-23b-368%7C%7C23; __utmb=74597006.3.9.1511328795229
# Connection: keep-alive
headers = {
'Host':'meishi.meituan.com',
'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:57.0) Gecko/20100101 Firefox/57.0',
'Accept':'application/json',
'Accept-Language':'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2',
'Accept-Encoding':'gzip, deflate',
'Content-Type':'application/json',
'x-requested-with':'XMLHttpRequest',
# 'Content-Length':'181',
'Connection':'keep-alive'
}
POST_DATA_1 = {
"offset": 0,
"limit": 15,
"cateId": 1,
"lineId": 0,
"stationId": 0,
"areaId": 0,
"sort": "default",
"deal_attr_23": "",
"deal_attr_24": "",
"deal_attr_25": "",
"poi_attr_20043": "",
"poi_attr_20033": ""
}
data = json.dumps(POST_DATA_1)
request = urllib2.Request(POST_URL_1, data,headers=headers)
response = opener.open(request)
res = response.read()
print res
#print unzip(res)
运行结果如下:
'RR �&S k���
��g�3�Qsnd7!�3�������Z�4��p �����t$��J�Ү�����Vz{,��皞~�
Tۜ}����٧�����Ϥ$���ک1��}��Ĕ/bz*S<�:1qߐf���� ���B��+�� *�h"��Y� 1t��p
��� �9��:zt�ʭz�f݅���+�ɟ��6la�m8Ԧ�6��c��ԣ*e�#˲�Fc��*�mL�Xz\��lG�f� �y(�x&�}�a�M�eo��҅n]/t�4*�?d��Lc�݉�.�s���Ee ���\"�?���O�
�5���]B!
a
���$�658V��Z�ዣ��1#Q�&����QY43��H�a�
A��,`5 ���r0��sJ��bUEa�d�&
�X��J��J����c��6�8�4��M;�eO��Qc�����:xi�嶁K7!� ��@#S�҉TnB����e.�����[��7;�����S��ٗ��E����ro�20tj��e݇��s�W'�s �Z#j�q�w�{�;y��v������X� -��&��V��5gC+����o�Qwz�i뷊"�̏�g��[�ko�m��L��1��B���t۹�����tw����ZA��?ѐ��?�H�1*
#�'B�8�< mxk�
问题原因:
可能是压缩了,看下头里面是不是有 Content-Encoding:xxx
如果是压缩了,需要手动解压,urllib是不会帮你解压的
解决后的代码:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Created on 2017-11-22
@author: Negen
"""
import urllib2
import urllib
import cookielib
import sys
import json
reload(sys)
sys.setdefaultencoding("utf-8")
# req = urllib2.Request("http://meishi.meituan.com/i/?ci=30&stid_b=1&cevent=imt%2Fhomepage%2Fcategory1%2F1")
# res = urllib2.urlopen(req)
# print res.read()
URL = 'http://meishi.meituan.com/i/?ci=30&stid_b=1&cevent=imt%2Fhomepage%2Fcategory1%2F1'
POST_URL_1 = "http://meishi.meituan.com/i/api/channel/deal/list"
cookie = cookielib.CookieJar()
handler = urllib2.HTTPCookieProcessor(cookie)
opener = urllib2.build_opener(handler)
opener.open(URL)
print cookie
# 请求头
# POST /i/api/channel/deal/list HTTP/1.1
# Host: meishi.meituan.com
# User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:57.0) Gecko/20100101 Firefox/57.0
# Accept: application/json
# Accept-Language: zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2
# Accept-Encoding: gzip, deflate
# Referer: http://meishi.meituan.com/i/?ci=30&stid_b=1&cevent=imt%2Fhomepage%2Fcategory1%2F1
# Content-Type: application/json
# x-requested-with: XMLHttpRequest
# Content-Length: 181
# Cookie: uuid=fde5207399f0bc0e4f02.1511322657.0.0.0; oc=HWcmne2V5gocN72iSp2W6nf4oSxvTOeQXS3BSHV5K0towR4FLx6Wy0Q7dkgNJpL_5bntemB2VelYvz6AVk4ChB4NULJUumoqZWHw5px35cR5MM7R3VBPkyNs0Lp959JYr0o7MM9ZwaqFxWG62qxQjfl2kK90EIkRAoSgoXk-hhc; ci=30; rvct=30; _lx_utm=utm_campaign%3Dbaidu%26utm_medium%3Dorganic%26utm_source%3Dbaidu%26utm_content%3Dhomepage%26utm_term%3D; _lxsdk_cuid=15fe1d9a765c8-01461cc3612c3b-49566c-100200-15fe1d9a766c8; IJSESSIONID=1vl0lc5i8s2osi8a33wov1kao; iuuid=38393C6E8D92360544E3C5AFB235B93F517247B9AD620FBB69E068DE60A2795B; cityname="%E6%B7%B1%E5%9C%B3"; i_extend=C_b1Gimthomepagecategory11H__a; latlng=22.543099,114.057868,1511328768910; __utma=74597006.1335519448.1511322706.1511322706.1511328729.2; __utmc=74597006; __utmz=74597006.1511328729.2.2.utmcsr=meishi.meituan.com|utmccn=(referral)|utmcmd=referral|utmcct=/i/poi/5896925; ci3=1; client-id=5f48120f-8a0b-4404-b04b-d3cacfd253b8; _lx_utm=utm_campaign%3Dbaidu%26utm_medium%3Dorganic%26utm_source%3Dbaidu%26utm_content%3Dhomepage; _hc.v=25634c66-97e5-a045-4689-729da9d4404b.1511323102; __mta=153950471.1511323102571.1511329392775.1511329693630.9; c_6NCia=1; _lxsdk_s=15fe1d9a766-32b-23b-368%7C%7C23; __utmb=74597006.3.9.1511328795229
# Connection: keep-alive
headers = {
'Host':'meishi.meituan.com',
'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:57.0) Gecko/20100101 Firefox/57.0',
'Accept':'application/json',
'Accept-Language':'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2',
'Accept-Encoding':'gzip, deflate',
'Content-Type':'application/json',
'x-requested-with':'XMLHttpRequest',
# 'Content-Length':'181',
'Connection':'keep-alive'
}
POST_DATA_1 = {
"offset": 0,
"limit": 15,
"cateId": 1,
"lineId": 0,
"stationId": 0,
"areaId": 0,
"sort": "default",
"deal_attr_23": "",
"deal_attr_24": "",
"deal_attr_25": "",
"poi_attr_20043": "",
"poi_attr_20033": ""
}
data = json.dumps(POST_DATA_1)
request = urllib2.Request(POST_URL_1, data,headers=headers)
response = opener.open(request)
def unzip(data):
import gzip
import StringIO
data = StringIO.StringIO(data)
gz = gzip.GzipFile(fileobj=data)
data = gz.read()
gz.close()
return data
res = response.read()
print unzip(res)
更多推荐
所有评论(0)