104 lines
3.5 KiB
Python
104 lines
3.5 KiB
Python
import logging
|
|
import sqlite3
|
|
import threading
|
|
import time
|
|
|
|
|
|
class Scanner:
|
|
|
|
def __init__(self, database_name, file_name):
|
|
logging.basicConfig(
|
|
level=logging.DEBUG,
|
|
format="%(asctime)s %(levelname)s %(message)s",
|
|
datefmt='%Y-%m-%d %H:%M:%S',
|
|
)
|
|
self.logger = logging.getLogger()
|
|
self.database_connection = None
|
|
self.database_name = database_name
|
|
self.file_name = file_name
|
|
self.file_rows = 0
|
|
self.handle_total = 0
|
|
self.handle_invalid = 0
|
|
self.handle_queue = 0
|
|
self.cancel_print_insertion_speed = None
|
|
|
|
def connect_database(self):
|
|
self.database_connection = sqlite3.connect(self.database_name)
|
|
|
|
def close_database(self):
|
|
self.database_connection.close()
|
|
|
|
def insert_qq_and_phone(self, id, qq, phone):
|
|
cursor = self.database_connection.cursor()
|
|
try:
|
|
cursor.execute("INSERT INTO qq VALUES (?, ?, ?);", (id, qq, phone))
|
|
except sqlite3.IntegrityError:
|
|
self.handle_invalid += 1
|
|
finally:
|
|
self.handle_total += 1
|
|
self.handle_queue += 1
|
|
|
|
def start_insertion_speed(self):
|
|
event = threading.Event()
|
|
|
|
def print_insertion_speed():
|
|
handle_total = self.handle_total
|
|
while not event.wait(1):
|
|
if self.handle_total - handle_total == 0:
|
|
continue
|
|
self.logger.info("{}/s, {}/{} progress, {} rows are invalid, {} seconds left".format(
|
|
self.handle_total - handle_total,
|
|
self.handle_total,
|
|
self.file_rows,
|
|
self.handle_invalid,
|
|
(self.file_rows - self.handle_total) / (self.handle_total - handle_total),
|
|
))
|
|
handle_total = self.handle_total
|
|
|
|
threading.Thread(target=print_insertion_speed).start()
|
|
return event.set
|
|
|
|
def start(self):
|
|
# Get the number of file rows
|
|
self.logger.info("start scanning file lines")
|
|
start_time = time.time()
|
|
with open(self.file_name) as file:
|
|
self.file_rows = 0
|
|
for _ in file:
|
|
self.file_rows += 1
|
|
end_time = time.time()
|
|
self.logger.info("scan completed, there are a total of {} lines, and it taken {} seconds".format(
|
|
self.file_rows,
|
|
end_time - start_time,
|
|
))
|
|
# Insert QQ and phone numbers
|
|
self.connect_database()
|
|
self.cancel_print_insertion_speed = self.start_insertion_speed()
|
|
with open(self.file_name) as file:
|
|
for line in file:
|
|
line = line.strip()
|
|
data = line.split("----")
|
|
if len(data) < 2:
|
|
self.handle_invalid += 1
|
|
self.handle_total += 1
|
|
continue
|
|
phone = data[-1]
|
|
for qq in data[:-1]:
|
|
self.insert_qq_and_phone(self.handle_total, qq, phone)
|
|
if self.handle_queue >= 400000:
|
|
self.database_connection.commit()
|
|
self.handle_queue = 0
|
|
self.database_connection.commit()
|
|
self.cancel_print_insertion_speed()
|
|
self.close_database()
|
|
self.logger.info("completed, insert {} rows, {} rows of invalid data".format(
|
|
self.handle_total,
|
|
self.handle_invalid,
|
|
))
|
|
exit()
|
|
|
|
|
|
if __name__ == '__main__':
|
|
scanner = Scanner("database/database.db", "source/6.9更新总库.txt")
|
|
scanner.start()
|