diff --git a/parser/parse_dns_logs.py b/parser/parse_dns_logs.py index 7d0a3fa..29367a1 100644 --- a/parser/parse_dns_logs.py +++ b/parser/parse_dns_logs.py @@ -1,29 +1,56 @@ -import re, sqlite3, os +#!/usr/bin/env python3 +import re +import sqlite3 +import os from datetime import datetime -db = sqlite3.connect('../db/dns.sqlite') -db.execute('''CREATE TABLE IF NOT EXISTS logs ( +LOG_DIR = "../logs" +DB_PATH = "../db/dns.sqlite" + +# Регулярное выражение для парсинга строки +pattern = re.compile( + r'(\d{2}/\d{2}/\d{4} \d{2}:\d{2}:\d{2}).*UDP Rcv (\d+\.\d+\.\d+\.\d+).*Q + +\[.*\] + + (\w+) ([\w\.-]+)\.' +) + +# Создание базы данных и таблицы +conn = sqlite3.connect(DB_PATH) +cursor = conn.cursor() +cursor.execute(""" +CREATE TABLE IF NOT EXISTS logs ( id INTEGER PRIMARY KEY AUTOINCREMENT, timestamp TEXT, src_ip TEXT, qtype TEXT, - domain TEXT -)''') + domain TEXT, + UNIQUE(timestamp, src_ip, qtype, domain) +) +""") -pattern = re.compile(r'(\d{2}/\d{2}/\d{4} \d{2}:\d{2}:\d{2}).*UDP Rcv (\d+\.\d+\.\d+\.\d+).*Q - -\[.*\] - - (\w+) ([\w\.-]+)\.') - -for filename in os.listdir('../logs'): - if filename.startswith('dns') and filename.endswith('.log'): - with open(f'../logs/{filename}', encoding='utf-8') as f: +# Обработка всех логов +for filename in os.listdir(LOG_DIR): + if filename.startswith("dns") and filename.endswith(".log"): + filepath = os.path.join(LOG_DIR, filename) + print(f"Обрабатываю: {filepath}") + with open(filepath, encoding="utf-8", errors="ignore") as f: for line in f: match = pattern.search(line) if match: - ts = datetime.strptime(match.group(1), "%d/%m/%Y %H:%M:%S") - db.execute("INSERT INTO logs (timestamp, src_ip, qtype, domain) VALUES (?, ?, ?, ?)", - (ts.isoformat(), match.group(2), match.group(3), match.group(4))) -db.commit() -db.close() \ No newline at end of file + try: + ts = datetime.strptime(match.group(1), "%d/%m/%Y %H:%M:%S").isoformat() + src_ip = match.group(2) + qtype = match.group(3) + domain = match.group(4) + cursor.execute(""" + INSERT OR IGNORE INTO logs (timestamp, src_ip, qtype, domain) + VALUES (?, ?, ?, ?) + """, (ts, src_ip, qtype, domain)) + except Exception as e: + print(f"Ошибка при обработке строки: {line.strip()}\n{e}") + +conn.commit() +conn.close() +print("✅ Парсинг завершён.") \ No newline at end of file