网站建设开发的规划流程,wordpress评论代码,山东的互联网公司都有什么,要做一个网站得怎么做一、网址#xff1a;
全国行政区划信息查询平台 二、分析并搭建框架
检查网页源码#xff1a; 检查网页源码可以发现#xff1a; 所有省级信息全部在javaScript下的json中#xff0c;会在页面加载时加载json数据#xff0c;填充到页面的option中。 1、第一步#xff1a…一、网址
全国行政区划信息查询平台 二、分析并搭建框架
检查网页源码 检查网页源码可以发现 所有省级信息全部在javaScript下的json中会在页面加载时加载json数据填充到页面的option中。 1、第一步使用正则表达式抓取json数据并解析组成一个province集合
# 获取省的集合def get_province(self):pattern re.compile(rvar json (.*?);, re.MULTILINE | re.DOTALL)script self.soup.find(script, textpattern)lists str(pattern.search(script.text).group(1))json_list json.loads(lists)# province_list set()province_dict dict()for json_data in json_list:province json_data[shengji]quhua_code json_data[quHuaDaiMa]province_dict.update({quhua_code: province})# province_list.add(province)# print(province_dict)return province_dict
2、第二步检查该网站实现级联查询的方式找出查询市区的方式 根据这段源码可看出在选择 省级的后网页会调用selectJson接口进行一个post请求上图可以看到请求的body和header等信息。
代码
# 获取市def get_city(self, shengji):body (shengji shengji).encode(UTF-8)# body shengji江苏省(苏)..encode(UTF-8)headers {Content-Type: application/x-www-form-urlencoded; charsetutf-8,User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36}response requests.post(http://xzqh.mca.gov.cn/selectJson, databody, headersheaders)content response.contentjson_list json.loads(content)# city_list set()city_dict dict()for json_data in json_list:citys json_data[diji]# city_list.add(citys)quhua_code json_data[quHuaDaiMa]city_dict.update({quhua_code: citys})return city_dict# return city_list# 获取区def get_area(self, shengji, diji):body (shengji shengji diji diji).encode(UTF-8)headers {Content-Type: application/x-www-form-urlencoded; charsetutf-8,User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36}response requests.post(http://xzqh.mca.gov.cn/selectJson, databody, headersheaders)content response.contentjson_list json.loads(content)# area_list set()area_dict dict()for json_data in json_list:area json_data[xianji]# area_list.add(area)area_code json_data[quHuaDaiMa]area_dict.update({area_code: area})return area_dict
3、第三步main函数遍历所有省市区数据入库
数据库表结构如下 三、全部代码
import requests
from bs4 import BeautifulSoup
import pymysql
import re
import jsonclass allAreaDataNew(object):base_url http://xzqh.mca.gov.cn/mapheaders {User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36}wb_data requests.get(base_url, headersheaders)wb_data.encoding GBKsoup BeautifulSoup(wb_data.text, lxml)# print(soup)def __init__(self):#### 自己数据库信息self.db pymysql.connect(***, ***, ***, ***, charsetutf8mb4) # mysql数据库self.main()self.db.close()# 入口def main(self):sql_list set()province_dict self.get_province()for province_code in province_dict:province province_dict[province_code]city_dict self.get_city(province)sql_province insert into area_config values (null, province ,PROVINCE, province_code ,0)sql_list.add(sql_province)print(province_code ----------------------------------省------------------------------------------ province \n)for city_code in city_dict:city city_dict[city_code]area_dict self.get_area(province, city)print(city_code *******************市**************** city \n)# 处理 省直辖县级行政单位if city 省直辖县级行政单位 or city 自治区直辖县级行政单位:sql_city insert into area_config values (null, city ,CITY, province_code , province_code )sql_list.add(sql_city)for area_code in area_dict:area area_dict[area_code]print(area_code -区- area \n)sql_area insert into area_config values (null, area ,DISTRICT, area_code , province_code )sql_list.add(sql_area)else:sql_city insert into area_config values (null, city ,CITY, city_code , province_code )sql_list.add(sql_city)for area_code in area_dict:area area_dict[area_code]print(area_code -区- area \n)sql_area insert into area_config values (null, area ,DISTRICT, area_code , city_code )sql_list.add(sql_area)print(str(sql_list))# 事务入库empty_sql delete from area_configself.connect_mysql(empty_sql, sql_list)# 获取省def get_province(self):pattern re.compile(rvar json (.*?);, re.MULTILINE | re.DOTALL)script self.soup.find(script, textpattern)lists str(pattern.search(script.text).group(1))json_list json.loads(lists)# province_list set()province_dict dict()for json_data in json_list:province json_data[shengji]quhua_code json_data[quHuaDaiMa]province_dict.update({quhua_code: province})# province_list.add(province)# print(province_dict)return province_dict# 获取市def get_city(self, shengji):body (shengji shengji).encode(UTF-8)# body shengji江苏省(苏)..encode(UTF-8)headers {Content-Type: application/x-www-form-urlencoded; charsetutf-8,User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36}response requests.post(http://xzqh.mca.gov.cn/selectJson, databody, headersheaders)content response.contentjson_list json.loads(content)# city_list set()city_dict dict()for json_data in json_list:citys json_data[diji]# city_list.add(citys)quhua_code json_data[quHuaDaiMa]city_dict.update({quhua_code: citys})return city_dict# return city_list# 获取区def get_area(self, shengji, diji):body (shengji shengji diji diji).encode(UTF-8)headers {Content-Type: application/x-www-form-urlencoded; charsetutf-8,User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36}response requests.post(http://xzqh.mca.gov.cn/selectJson, databody, headersheaders)content response.contentjson_list json.loads(content)# area_list set()area_dict dict()for json_data in json_list:area json_data[xianji]# area_list.add(area)area_code json_data[quHuaDaiMa]area_dict.update({area_code: area})return area_dict# return area_listdef connect_mysql(self, empty_sql, sql_list):cursor self.db.cursor()try:cursor.execute(empty_sql)for sql in sql_list:cursor.execute(sql)print(更新所有数据完成)except Exception as e:print(更新失败)print(e)self.db.rollback()finally:cursor.close()# 提交操作self.db.commit()if __name__ __main__:allAreaDataNew()
代码执行成功后就可以查到中国所有省市区啦 特殊情况“省直辖县级行政单位”和“自治区直辖县级行政单位” 注意部分省有特殊的“直辖县级行政单位”或“自治区直辖县级行政单位”