西部数码网站备份,asp怎么新建网站,校园网站建设需要哪些,院校网站建设对比分析实训报总结文章目录 前言项目介绍技术介绍功能介绍核心代码数据库参考 系统效果图 前言
文章底部名片#xff0c;获取项目的完整演示视频#xff0c;免费解答技术疑问
项目介绍 随着开数字化阅读的普及#xff0c;豆瓣电子图书推荐系统应运而生#xff0c;旨在为用户提供个性化的阅… 文章目录 前言项目介绍技术介绍功能介绍核心代码数据库参考 系统效果图 前言
文章底部名片获取项目的完整演示视频免费解答技术疑问
项目介绍 随着开数字化阅读的普及豆瓣电子图书推荐系统应运而生旨在为用户提供个性化的阅读体验。基于Hadoop的强大数据处理能力该系统能够有效处理海量用户数据和书籍信息通过复杂的算法模型为用户推荐高质量的内容。管理员功能涵盖用户管理、豆瓣高分管理等确保了平台的高效运营。用户个人中心则提供修改密码、我的发布等服务增强了用户体验。整体上该推荐系统不仅提升了用户的阅读便利性也促进了知识分享与文化交流。 根据本系统的基本设计思路本系统在设计方面前台采用了java技术等进行基本的页面设计后台数据库采用MySQL。本系统的实现为豆瓣电子图书推荐系统的运行打下了基础为豆瓣电子图书推荐提供良好的条件。 最后我们通过需求分析、测试调整与豆瓣电子图书管理的实际需求相结合设计实现了豆瓣电子图书推荐系统。
技术介绍
开发语言Java 框架springboot JDK版本JDK1.8 服务器tomcat7 数据库mysql 数据库工具Navicat11 开发软件eclipse/myeclipse/idea Maven包Maven
功能介绍
系统的功能设计是整个系统的运行基础是一个把设计需求替换成以计算机系统的形式表示出来。通过对豆瓣电子图书推荐系统的调查、分析和研究得出了该系统的总体规划这是开发设计系统的初步核心。如下图所示 在这里插入图片描述
图4-1总体规划结构图
核心代码
# # -*- coding: utf-8 -*-# 数据爬取文件import scrapy
import pymysql
import pymssql
from ..items import DoubanItem
import time
from datetime import datetime,timedelta
import datetime as formattime
import re
import random
import platform
import json
import os
import urllib
from urllib.parse import urlparse
import requests
import emoji
import numpy as np
import pandas as pd
from sqlalchemy import create_engine
from selenium.webdriver import ChromeOptions, ActionChains
from scrapy.http import TextResponse
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
# 豆瓣高分
class DoubanSpider(scrapy.Spider):name doubanSpiderspiderUrl https://read.douban.com/j/kind/start_urls spiderUrl.split(;)protocol hostname realtime Falsedef __init__(self,realtimeFalse,*args, **kwargs):super().__init__(*args, **kwargs)self.realtime realtimetruedef start_requests(self):plat platform.system().lower()if not self.realtime and (plat linux or plat windows):connect self.db_connect()cursor connect.cursor()if self.table_exists(cursor, l908648a_douban) 1:cursor.close()connect.close()self.temp_data()returndata_json{sort: book_rating,page: 1,kind: 100,query: \n query getFilterWorksList($works_ids: [ID!]) {\n worksList(worksIds: $works_ids) {\n \n id\n isOrigin\n isEssay\n \n title\n cover(useSmall: false)\n url\n isBundle\n coverLabel(preferVip: true)\n \n \n url\n title\n\n \n author {\n name\n url\n }\n origAuthor {\n name\n url\n }\n translator {\n name\n url\n }\n\n \n abstract\n authorHighlight\n editorHighlight\n\n \n isOrigin\n kinds {\n \n name skip(if: true)\n shortName include(if: true)\n id\n \n }\n ... on WorksBase include(if: true) {\n wordCount\n wordCountUnit\n }\n ... on WorksBase include(if: false) {\n inLibraryCount\n }\n ... on WorksBase include(if: false) {\n \n isEssay\n \n ... on EssayWorks {\n favorCount\n }\n \n \n \n averageRating\n ratingCount\n url\n isColumn\n isFinished\n \n \n \n }\n ... on EbookWorks include(if: true) {\n \n ... on EbookWorks {\n book {\n url\n averageRating\n ratingCount\n }\n }\n \n }\n ... on WorksBase include(if: false) {\n isColumn\n isEssay\n onSaleTime\n ... on ColumnWorks {\n updateTime\n }\n }\n ... on WorksBase include(if: true) {\n isColumn\n ... on ColumnWorks {\n isFinished\n }\n }\n ... on EssayWorks {\n essayActivityData {\n \n title\n uri\n tag {\n name\n color\n background\n icon2x\n icon3x\n iconSize {\n height\n }\n iconPosition {\n x y\n }\n }\n \n }\n }\n highlightTags {\n name\n }\n ... on WorksBase include(if: false) {\n fanfiction {\n tags {\n id\n name\n url\n }\n }\n }\n \n \n ... on WorksBase {\n copyrightInfo {\n newlyAdapted\n newlyPublished\n adaptedName\n publishedName\n }\n }\n\n isInLibrary\n ... on WorksBase include(if: false) {\n \n fixedPrice\n salesPrice\n isRebate\n \n }\n ... on EbookWorks {\n \n fixedPrice\n salesPrice\n isRebate\n \n }\n ... on WorksBase include(if: true) {\n ... on EbookWorks {\n id\n isPurchased\n isInWishlist\n }\n }\n ... on WorksBase include(if: false) {\n fanfiction {\n fandoms {\n title\n url\n }\n }\n }\n ... on WorksBase include(if: false) {\n fanfiction {\n kudoCount\n }\n }\n \n }\n }\n ,variables: {}}pageNum 1 1for page in range(1, pageNum):data_json[page] pageraw json.dumps(data_json)yield scrapy.FormRequest(urlself.spiderUrl,bodyraw,methodPOST,callbackself.parse,headers{Content-Type: application/json},dont_filterTrue)# 列表解析def parse(self, response):_url urlparse(self.spiderUrl)self.protocol _url.schemeself.hostname _url.netlocplat platform.system().lower()if not self.realtime and (plat linux or plat windows):connect self.db_connect()cursor connect.cursor()if self.table_exists(cursor, l908648a_douban) 1:cursor.close()connect.close()self.temp_data()returnprint(response:,response.text)data json.loads(response.text)try:list data[list]except:passfor item in list:fields DoubanItem()fields[bookname] item[title]fields[cover] item[cover]fields[laiyuan] https://read.douban.comitem[url]fields[wordcount] int(item[wordCount])fields[salesprice] float(item[salesPrice])yield scrapy.Request(urlhttps://read.douban.com/jitem[url],meta{fields:fields},callbackself.detail_parse,dont_filterTrue)# 详情解析def detail_parse(self, response):print(detail_parse:,response.text)fields response.meta[fields]data json.loads(response.text)try:fields[author] data[author]except:fields[author] data[original_author]try:fields[chuban] data[publisher]except:passtry:fields[tags] ,.join(i[tag] for i in data[tags])except:passtry:fields[mulu] ;.join(i[title] for i in data[table_of_contents])except:passtry:fields[rating] data[book_average_rating]except:passreturn fields# 数据清洗def pandas_filter(self):engine create_engine(mysqlpymysql://root:123456localhost/spiderl908648a?charsetUTF8MB4)df pd.read_sql(select * from douban limit 50, con engine)# 重复数据过滤df.duplicated()df.drop_duplicates()#空数据过滤df.isnull()df.dropna()# 填充空数据df.fillna(value 暂无)# 异常值过滤# 滤出 大于800 和 小于 100 的a np.random.randint(0, 1000, size 200)cond (a800) (a100)a[cond]# 过滤正态分布的异常值b np.random.randn(100000)# 3σ过滤异常值σ即是标准差cond np.abs(b) 3 * 1b[cond]# 正态分布数据df2 pd.DataFrame(data np.random.randn(10000,3))# 3σ过滤异常值σ即是标准差cond (df2 3*df2.std()).any(axis 1)# 不满⾜条件的⾏索引index df2[cond].index# 根据⾏索引进⾏数据删除df2.drop(labelsindex,axis 0)# 去除多余html标签def remove_html(self, html):if html None:return pattern re.compile(r[^], re.S)return pattern.sub(, html).strip()# 数据库连接def db_connect(self):type self.settings.get(TYPE, mysql)host self.settings.get(HOST, localhost)port int(self.settings.get(PORT, 3306))user self.settings.get(USER, root)password self.settings.get(PASSWORD, 123456)try:database self.databaseNameexcept:database self.settings.get(DATABASE, )if type mysql:connect pymysql.connect(hosthost, portport, dbdatabase, useruser, passwdpassword, charsetutf8)else:connect pymssql.connect(hosthost, useruser, passwordpassword, databasedatabase)return connect# 断表是否存在def table_exists(self, cursor, table_name):cursor.execute(show tables;)tables [cursor.fetchall()]table_list re.findall((\.*?\),str(tables))table_list [re.sub(,,each) for each in table_list]if table_name in table_list:return 1else:return 0# 数据缓存源def temp_data(self):connect self.db_connect()cursor connect.cursor()sql insert into douban(id,bookname,author,cover,laiyuan,wordcount,salesprice,chuban,tags,mulu,rating)selectid,bookname,author,cover,laiyuan,wordcount,salesprice,chuban,tags,mulu,ratingfrom l908648a_doubanwhere(not exists (selectid,bookname,author,cover,laiyuan,wordcount,salesprice,chuban,tags,mulu,ratingfrom douban wheredouban.idl908648a_douban.id))cursor.execute(sql)connect.commit()connect.close()
数据库参考
/*!40101 SET character_set_client saved_cs_client */;--
-- Dumping data for table discussdouban
--LOCK TABLES discussdouban WRITE;
/*!40000 ALTER TABLE discussdouban DISABLE KEYS */;
/*!40000 ALTER TABLE discussdouban ENABLE KEYS */;
UNLOCK TABLES;--
-- Table structure for table douban
--DROP TABLE IF EXISTS douban;
/*!40101 SET saved_cs_client character_set_client */;
/*!40101 SET character_set_client utf8 */;
CREATE TABLE douban (id bigint(20) NOT NULL AUTO_INCREMENT COMMENT 主键,addtime timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT 创建时间,bookname varchar(200) DEFAULT NULL COMMENT 书名,author varchar(200) DEFAULT NULL COMMENT 作者,cover longtext COMMENT 封面,laiyuan varchar(200) DEFAULT NULL COMMENT 来源,wordcount int(11) DEFAULT NULL COMMENT 字数,salesprice double DEFAULT NULL COMMENT 价格,chuban varchar(200) DEFAULT NULL COMMENT 出版社,tags varchar(200) DEFAULT NULL COMMENT 标签,mulu longtext COMMENT 章节目录,rating double DEFAULT NULL COMMENT 评分,thumbsupnum int(11) DEFAULT 0 COMMENT 赞,crazilynum int(11) DEFAULT 0 COMMENT 踩,clicktime datetime DEFAULT NULL COMMENT 最近点击时间,clicknum int(11) DEFAULT 0 COMMENT 点击次数,discussnum int(11) DEFAULT 0 COMMENT 评论数,storeupnum int(11) DEFAULT 0 COMMENT 收藏数,PRIMARY KEY (id)
) ENGINEInnoDB AUTO_INCREMENT29 DEFAULT CHARSETutf8 COMMENT豆瓣高分;
/*!40101 SET character_set_client saved_cs_client */;--
-- Dumping data for table douban
--LOCK TABLES douban WRITE;
/*!40000 ALTER TABLE douban DISABLE KEYS */;
INSERT INTO douban VALUES (21,2024-03-14 13:36:40,书名1,作者1,upload/douban_cover1.jpg,upload/douban_cover2.jpg,upload/douban_cover3.jpg,来源1,1,1,出版社1,标签1,章节目录1,1,1,1,2024-03-14 21:36:40,1,0,1),(22,2024-03-14 13:36:40,书名2,作者2,upload/douban_cover2.jpg,upload/douban_cover3.jpg,upload/douban_cover4.jpg,来源2,2,2,出版社2,标签2,章节目录2,2,2,2,2024-03-14 21:36:40,2,0,2),(23,2024-03-14 13:36:40,书名3,作者3,upload/douban_cover3.jpg,upload/douban_cover4.jpg,upload/douban_cover5.jpg,来源3,3,3,出版社3,标签3,章节目录3,3,3,3,2024-03-14 21:36:40,3,0,3),(24,2024-03-14 13:36:40,书名4,作者4,upload/douban_cover4.jpg,upload/douban_cover5.jpg,upload/douban_cover6.jpg,来源4,4,4,出版社4,标签4,章节目录4,4,4,4,2024-03-14 21:36:40,4,0,4),(25,2024-03-14 13:36:40,书名5,作者5,upload/douban_cover5.jpg,upload/douban_cover6.jpg,upload/douban_cover7.jpg,来源5,5,5,出版社5,标签5,章节目录5,5,5,5,2024-03-14 21:36:40,5,0,5),(26,2024-03-14 13:36:40,书名6,作者6,upload/douban_cover6.jpg,upload/douban_cover7.jpg,upload/douban_cover8.jpg,来源6,6,6,出版社6,标签6,章节目录6,6,6,6,2024-03-14 21:36:40,6,0,6),(27,2024-03-14 13:36:40,书名7,作者7,upload/douban_cover7.jpg,upload/douban_cover8.jpg,upload/douban_cover9.jpg,来源7,7,7,出版社7,标签7,章节目录7,7,7,7,2024-03-14 21:36:40,7,0,7),(28,2024-03-14 13:36:40,书名8,作者8,upload/douban_cover8.jpg,upload/douban_cover9.jpg,upload/douban_cover10.jpg,来源8,8,8,出版社8,标签8,章节目录8,8,8,8,2024-03-14 21:36:40,8,0,8);
/*!40000 ALTER TABLE douban ENABLE KEYS */;
UNLOCK TABLES;--
-- Table structure for table forum
--DROP TABLE IF EXISTS forum;
/*!40101 SET saved_cs_client character_set_client */;
/*!40101 SET character_set_client utf8 */;
CREATE TABLE forum (id bigint(20) NOT NULL AUTO_INCREMENT COMMENT 主键,addtime timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT 创建时间,title varchar(200) DEFAULT NULL COMMENT 帖子标题,content longtext NOT NULL COMMENT 帖子内容,parentid bigint(20) DEFAULT NULL COMMENT 父节点id,userid bigint(20) NOT NULL COMMENT 用户id,username varchar(200) DEFAULT NULL COMMENT 用户名,avatarurl longtext COMMENT 头像,isdone varchar(200) DEFAULT NULL COMMENT 状态,istop int(11) DEFAULT 0 COMMENT 是否置顶,toptime datetime DEFAULT NULL COMMENT 置顶时间,PRIMARY KEY (id)
) ENGINEInnoDB AUTO_INCREMENT39 DEFAULT CHARSETutf8 COMMENT论坛交流;
/*!40101 SET character_set_client saved_cs_client */;--
-- Dumping data for table forum
--LOCK TABLES forum WRITE;
/*!40000 ALTER TABLE forum DISABLE KEYS */;
INSERT INTO forum VALUES (31,2024-03-14 13:36:40,帖子标题1,帖子内容1,0,1,用户名1,upload/forum_avatarurl1.jpg,upload/forum_avatarurl2.jpg,upload/forum_avatarurl3.jpg,开放,0,2024-03-14 21:36:40),(32,2024-03-14 13:36:40,帖子标题2,帖子内容2,0,2,用户名2,upload/forum_avatarurl2.jpg,upload/forum_avatarurl3.jpg,upload/forum_avatarurl4.jpg,开放,0,2024-03-14 21:36:40),(33,2024-03-14 13:36:40,帖子标题3,帖子内容3,0,3,用户名3,upload/forum_avatarurl3.jpg,upload/forum_avatarurl4.jpg,upload/forum_avatarurl5.jpg,开放,0,2024-03-14 21:36:40),(34,2024-03-14 13:36:40,帖子标题4,帖子内容4,0,4,用户名4,upload/forum_avatarurl4.jpg,upload/forum_avatarurl5.jpg,upload/forum_avatarurl6.jpg,开放,0,2024-03-14 21:36:40),(35,2024-03-14 13:36:40,帖子标题5,帖子内容5,0,5,用户名5,upload/forum_avatarurl5.jpg,upload/forum_avatarurl6.jpg,upload/forum_avatarurl7.jpg,开放,0,2024-03-14 21:36:40),(36,2024-03-14 13:36:40,帖子标题6,帖子内容6,0,6,用户名6,upload/forum_avatarurl6.jpg,upload/forum_avatarurl7.jpg,upload/forum_avatarurl8.jpg,开放,0,2024-03-14 21:36:40),(37,2024-03-14 13:36:40,帖子标题7,帖子内容7,0,7,用户名7,upload/forum_avatarurl7.jpg,upload/forum_avatarurl8.jpg,upload/forum_avatarurl9.jpg,开放,0,2024-03-14 21:36:40),(38,2024-03-14 13:36:40,帖子标题8,帖子内容8,0,8,用户名8,upload/forum_avatarurl8.jpg,upload/forum_avatarurl9.jpg,upload/forum_avatarurl10.jpg,开放,0,2024-03-14 21:36:40);
/*!40000 ALTER TABLE forum ENABLE KEYS */;
UNLOCK TABLES;--
-- Table structure for table l908648a_douban
--DROP TABLE IF EXISTS l908648a_douban;
/*!40101 SET saved_cs_client character_set_client */;
/*!40101 SET character_set_client utf8 */;
CREATE TABLE l908648a_douban (id bigint(20) NOT NULL DEFAULT 0 COMMENT 主键,addtime timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT 创建时间,bookname varchar(200) CHARACTER SET utf8 DEFAULT NULL COMMENT 书名,author varchar(200) CHARACTER SET utf8 DEFAULT NULL COMMENT 作者,cover longtext CHARACTER SET utf8 COMMENT 封面,laiyuan longtext CHARACTER SET utf8 COMMENT 来源,wordcount int(11) DEFAULT NULL COMMENT 字数,salesprice double DEFAULT NULL COMMENT 价格,chuban varchar(200) CHARACTER SET utf8 DEFAULT NULL COMMENT 出版社,tags varchar(200) CHARACTER SET utf8 DEFAULT NULL COMMENT 标签,mulu longtext CHARACTER SET utf8 COMMENT 章节目录,rating double DEFAULT NULL COMMENT 评分
) ENGINEInnoDB DEFAULT CHARSETutf8mb4;
/*!40101 SET character_set_client saved_cs_client */;
系统效果图