Surely you have found a Twitter database there... If you have it downloaded you will see that there are several files, each file is a part of the database and they are quite heavy, right?
Here I leave you a code that cleans this data and adds it to a .db in which you will have it all together!
Code:
import sqlite3
import re
from colorama import Fore, Style
def process_line(line, line_number, cursor, error_log, batch_size):
email_match = re.search(r'Email: (.+?) - Name: (.+?) - ScreenName: (.+?) - Followers: (\d+) - Created At: (.+)', line)
if email_match:
email, name, screen_name, followers, created_at = map(str.strip, email_match.groups())
followers = int(followers)
cursor.execute("INSERT INTO datos (Email, Name, ScreenName, Followers, CreatedAt) VALUES (?, ?, ?, ?, ?)",
(email, name, screen_name, followers, created_at))
print(f"{Fore.GREEN}[+] {email}:{name}:{screen_name}:{followers}:{line_number}{Fore.RESET}")
def main():
db_conn = sqlite3.connect('database.db')
db_cursor = db_conn.cursor()
db_cursor.execute('CREATE TABLE IF NOT EXISTS datos (Email TEXT, Name TEXT, ScreenName TEXT, Followers INTEGER, CreatedAt TEXT)')
with open('FILE', 'r', encoding='utf-8') as file:
batch_size = 1000
batch = []
error_file = open('error_log.txt', 'w', encoding='utf-8')
for line_number, line in enumerate(file, 1):
Here I leave you a code that cleans this data and adds it to a .db in which you will have it all together!
Code:
import sqlite3
import re
from colorama import Fore, Style
def process_line(line, line_number, cursor, error_log, batch_size):
email_match = re.search(r'Email: (.+?) - Name: (.+?) - ScreenName: (.+?) - Followers: (\d+) - Created At: (.+)', line)
if email_match:
email, name, screen_name, followers, created_at = map(str.strip, email_match.groups())
followers = int(followers)
cursor.execute("INSERT INTO datos (Email, Name, ScreenName, Followers, CreatedAt) VALUES (?, ?, ?, ?, ?)",
(email, name, screen_name, followers, created_at))
print(f"{Fore.GREEN}[+] {email}:{name}:{screen_name}:{followers}:{line_number}{Fore.RESET}")
def main():
db_conn = sqlite3.connect('database.db')
db_cursor = db_conn.cursor()
db_cursor.execute('CREATE TABLE IF NOT EXISTS datos (Email TEXT, Name TEXT, ScreenName TEXT, Followers INTEGER, CreatedAt TEXT)')
with open('FILE', 'r', encoding='utf-8') as file:
batch_size = 1000
batch = []
error_file = open('error_log.txt', 'w', encoding='utf-8')
for line_number, line in enumerate(file, 1):