from bs4 import BeautifulSoup
import re
import sqlite3
conn = sqlite3.connect('onenav.db3')
print("数据库打开成功")
c = conn.cursor()
c.execute('''delete from on_links;''')
c.execute('''update sqlite_sequence SET seq = 0 where name ='on_links';''')
c.execute('''delete from on_categorys;''')
c.execute('''update sqlite_sequence SET seq = 0 where name ='on_categorys';''')
conn.commit()
print("数据清空完毕")
f = open('555.txt', encoding='utf-8')
html = f.read()
f.close()
soup = BeautifulSoup(html, 'html.parser')
div_tags = soup.find_all('div', {'class': 'site-main-li'})
category_id = 0
for div_tag in div_tags[1:-1]:
category_id += 1
category = div_tag.find('div', {'class', 'site-tit'}).get_text().strip()
links = div_tag.find_all('div', {'class', 'list siteList'})
print(category)
c.execute('''insert into on_categorys (`id`, `name`) values (?, ?);''', (category_id, category))
for link in links:
link_id = link.get('data-id')
if link_id:
url = link['data-links']
title = re.sub(r'<.*>', '', link.find('p', class_="title").get_text().strip())
print('\t'.join([url, title]))
c.execute('''insert into on_links (`fid`, `title`, `url`) values (?, ?, ?);''', (category_id, title, url))
print('---')
conn.commit()
conn.close()