python统计图作息规律统计分析_400w条QQ空间爬虫，及好友网络分析

101 阅读 0 评论 67 点赞

我是靠谱客的博主谦让眼神，最近开发中收集的这篇文章主要介绍python统计图作息规律统计分析_400w条QQ空间爬虫，及好友网络分析，觉得挺不错的，现在分享给大家，希望可以做个参考。

概述

学习机器学习算法，手上没有数据，于是决定自己动手，爬取QQ空间数据

本文参考文献

结果

只举例好友网络，说说时间，过多信息涉及好友隐私

数据

说明:为防止腾讯检测，随机丢掉一部分数据。

两代运行之后，大概有400w左右数据(Aliyun运行了大概8个小时)

好友网络

整张网络结构：过大，根本看不出什么

以某一好友为例，为保护隐私，数据标签不显示

hops表示最多经过几个节点(人)

发说说时间

随机抽取10w数据，统计。可以从下面(特别是小时统计)看出，在网络中的作息规律。

随机抽取100w数据，然后从中选出17年的80223条数据，单独分析每个月的情况

10w数据随机选择，然后17年只有1-9月的数据

按年统计

按月统计

基本上平均

按天统计

每小时统计图

部分源码

python登陆QQ空间

version 1 将自己的账号密码写入一个文本(userinfo.ini)，时候操作过快会提示错误

userinfo.ini内容

[qq_info]

qq_number=xxx(你的qq号)

qq_password=(你的密码)

64#coding:utf-8

from selenium import webdriver

import requests

import time

import os

from urllib import parse

import configparser

class Spider(object):

def __init__(self):

self.web=webdriver.Firefox()

self.web.get('https://user.qzone.qq.com')

config = configparser.ConfigParser(allow_no_value=False)

config.read('userinfo.ini')

self.__username =config.get('qq_info','qq_number')

#self.__password=config.get('qq_info','qq_password')

self.headers={

'host': 'h5.qzone.qq.com',

'accept-encoding':'gzip, deflate, br',

'accept-language':'zh-CN,zh;q=0.8',

'accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',

'user-agent':'Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:55.0) Gecko/20100101 Firefox/55.0',

'connection': 'keep-alive'

}

self.req=requests.Session()

self.cookies={}

def login(self):

self.web.switch_to_frame('login_frame')

log=self.web.find_element_by_id("switcher_plogin")

log.click()

time.sleep(1)

username=self.web.find_element_by_id('u')

username.send_keys(self.__username)

ps=self.web.find_element_by_id('p')

ps.send_keys(self.__password)

btn=self.web.find_element_by_id('login_button')

time.sleep(5)#延时太小会被检测，然后报错

self.web.get('https://user.qzone.qq.com/{}'.format(self.__username))

cookie=''

for elem in self.web.get_cookies():

cookie+=elem["name"]+"="+ elem["value"]+";"

self.cookies=cookie

self.get_g_tk()

#time.sleep(10)

self.headers['Cookie']=self.cookies

self.web.quit()

def get_g_tk(self):

p_skey = self.cookies[self.cookies.find('p_skey=')+7: self.cookies.find(';', self.cookies.find('p_skey='))]

h=5381

for i in p_skey:

h+=(h<<5)+ord(i)

print('g_tk',h&2147483647)

self.g_tk=h&2147483647

if __name__=='__main__':

sp=Spider()

sp.login()

version 2

自己扫码登陆。注意需要等待控制台提示再操作，不然selenium抓不到窗口

50#coding:utf-8

import requests

import time

import os

from urllib import parse

class Spider(object):

def __init__(self):

self.web=webdriver.Firefox()

self.web.get('https://user.qzone.qq.com')

self.__username ='这里填你的QQ号'#后续操作会用到

self.headers={

'host': 'h5.qzone.qq.com',

'accept-encoding':'gzip, deflate, br',

'accept-language':'zh-CN,zh;q=0.8',

'accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',

'user-agent':'Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:55.0) Gecko/20100101 Firefox/55.0',

'connection': 'keep-alive'

}

self.req=requests.Session()

self.cookies={}

def get_g_tk(self):

p_skey = self.cookies[self.cookies.find('p_skey=')+7: self.cookies.find(';', self.cookies.find('p_skey='))]

h=5381

for i in p_skey:

h+=(h<<5)+ord(i)

print('g_tk',h&2147483647)

self.g_tk=h&2147483647

def login(self):

print('请扫码登陆')

while 1:

if 'http://' in self.web.title:

break

print('扫码登陆成功')

time.sleep(2)

self.web.get('https://user.qzone.qq.com/{}'.format(self.__username))

cookie=''

for elem in self.web.get_cookies():

cookie+=elem["name"]+"="+ elem["value"]+";"

self.cookies=cookie

self.get_g_tk()

#time.sleep(10)

self.headers['Cookie']=self.cookies

self.web.quit()

if __name__=='__main__':

sp=Spider()

sp.login()

networkx 分析

networkx分析大体第一步都是构建网络，在网络中分增加节点node和增加egde,networkx支持批量添加。该部分和自己数据集的方式有关。

下面主要是网络构建成功之后的分析部分

73# -*- coding: utf-8 -*-

"""

Created on Wed Oct 4 22:23:27 2017

@author: x

"""

import networkx as nx

import matplotlib.pyplot as plt

import operator

#basicinformation of a network

def basic_info(G):

f=open('basic_info.txt','w')

f.write('网络节点数：')

f.write(str(G.number_of_nodes()) + 'n')

f.write('网络边数：')

f.write(str(G.size()) + 'n')

f.write('网络边加权和：')

f.write(str(G.size(weight='weight')) + 'n')

scc=nx.strongly_connected_components(G)#返回强连通子图的list

wcc=nx.weakly_connected_components(G)#返回弱连通子图的list

print("弱连接: ")

f.write('弱连接:'+'n')

for c in wcc:

# print (c)

f.write(str(c))

f.write('n')

print("强连接: ")

f.write('强连接:'+'n')

for s in scc:

# print(str(s))

f.write(str(s)+',')

f.write('n')

f.write('有向图平均路径长度：')

f.write(str(nx.average_shortest_path_length(G)) + 'n')

G=G.to_undirected()

f.write('平均聚类系数：')

f.write(str(nx.average_clustering(G)) + 'n')

f.write('平均路径长度：')

f.write(str(nx.average_shortest_path_length(G)) + 'n')

def node_exist(G,node):

if G.has_node(node):

return True

else :

return False

def draw_ego_graph(G,character,hops=1,show_lables=True):

"""

Expecting a graph_from-gdf

"""

y="%s的%s代好友网络"%(character,hops)

#Get the Ego Gaph and Position

ego=nx.ego_graph(G,character,hops)

pos = nx.spring_layout(ego)

plt.figure(figsize=(12,12))

plt.axis('off')

# Draw

nx.draw_networkx_edges(ego,pos,alpha=0.8,with_lables=True)

nx.draw_networkx_nodes(ego,pos,with_lables =True,node_size=50,cmp=plt.cm.hot)

if show_lables: nx.draw_networkx_labels(ego,pos)

plt.title('={}'.format(y))

plt.show()

#find top 10 key people

def key_people(G):

centrality=nx.degree_centrality(G)

nx.set_node_attributes(G,'centrality',centrality)

degrees =sorted(centrality.items(),key=operator.itemgetter(1),reverse =True)

for item in degrees[0:10]:print("%s : %0.3f"%item)

key_people()函数用来寻找在网络中，对网络连通最大的节点。数据可以反映很多现实问题。

draw_ego_graph()用来寻找子网络，单张网络往往非常大，(测试中，本数据集整个节点20w),研究并可视化子网络

最后

以上就是谦让眼神为你收集整理的python统计图作息规律统计分析_400w条QQ空间爬虫，及好友网络分析的全部内容，希望文章能够帮你解决python统计图作息规律统计分析_400w条QQ空间爬虫，及好友网络分析所遇到的程序开发问题。

如果觉得靠谱客网站的内容还不错，欢迎将靠谱客网站推荐给程序员好友。

本图文内容来源于网友提供，作为学习参考使用，或来自网络收集整理，版权属于原作者所有。

本文分类：python统计图作息规律统计分析
浏览次数：101 次浏览
发布日期：2023-10-09 23:45:16
本文链接：https://www.kaopuke.com/article/k-p-k_14_uzokf0_13_z_14_y.html

python统计图作息规律统计分析_400w条QQ空间爬虫，及好友网络分析

概述

最后

评论列表共有 0 条评论

发表评论取消回复

python统计图 作息规律统计分析_400w条QQ空间爬虫，及好友网络分析

概述

最后

相关文章

评论列表共有 0 条评论

发表评论 取消回复

python统计图作息规律统计分析_400w条QQ空间爬虫，及好友网络分析

发表评论取消回复