首次提交本地代码
This commit is contained in:
50
config.ini
Normal file
50
config.ini
Normal file
@@ -0,0 +1,50 @@
|
||||
[Dataset]
|
||||
# 填写数据集名称(文件夹名称),同时训练测试多个数据集,请用逗号隔开
|
||||
# 请确保数据集已经按照要求格式放置在./dataset路径下
|
||||
name=SWAT
|
||||
|
||||
[Method]
|
||||
# 填写模型名称(文件名称,不用写".py"),同时训练测试多个算法,请用逗号隔开
|
||||
# 请确保模型文件已经按照要求格式放置在./model路径下
|
||||
# 模型名称请全部由字母组成
|
||||
name=AnomalyTransformer
|
||||
|
||||
[Preprocess]
|
||||
# 填写预处理方法名称(文件名称,不用写".py")
|
||||
# 请确保预处理方法文件已经按照要求格式放置在./preprocess路径下
|
||||
name=standardization
|
||||
|
||||
[Evaluation]
|
||||
# 填写评估方法名称(文件名称,不用写".py"),同时使用多个评估方法,请用逗号隔开
|
||||
# 请确保评估方法文件已经按照要求格式放置在./evaluation路径下
|
||||
name=f1,f1pa,ftad,affiliation
|
||||
|
||||
[ModelPath]
|
||||
# 模型加载先前参数,请用"模型名_数据集名"作为变量名
|
||||
# 路径名称从TSAD文件夹下开始写相对路径,或者直接写绝对路径
|
||||
MtadGatAtt_SWAT=none
|
||||
|
||||
[BaseParameters]
|
||||
# train表示是否训练模型,请填写true或者false,为false时,仅测试模型
|
||||
train=true
|
||||
# epoch表示训练轮数,请填写正整数
|
||||
epochs=20
|
||||
# batch-size表示每个batch的尺寸,请填写正整数
|
||||
batch_size=32
|
||||
# learning-rate表示学习率,建议不要高于0.001
|
||||
learning_rate=1e-4
|
||||
# device表示使用的设备,可以选auto、cpu、cuda,其中auto代表自动判断,有gpu使用gpu,没有则使用cpu;选gpu时需要选择gpu序号,例如cuda:0
|
||||
device=auto
|
||||
|
||||
[CustomParameters]
|
||||
# 此处可以填入自定义参数,数量不限,用于填写特殊需求输入,例如模型文件所需的所有外部参数,构建数据集的部分参数
|
||||
# 输入数据长度input_size,即截取数据的窗口长度
|
||||
input_size=100
|
||||
# 输出数据长度output_size
|
||||
output_size=1
|
||||
# 截取数据的窗口的移动步长
|
||||
step=1
|
||||
# 数据集文件(csv)中,第一行是否需要忽略(第一行为列名的话需要忽略)
|
||||
del_column_name=true
|
||||
# 数据集文件(csv)中,第一列是否是时间戳
|
||||
time_index=true
|
||||
250
daemon.py
Normal file
250
daemon.py
Normal file
@@ -0,0 +1,250 @@
|
||||
'''
|
||||
***
|
||||
Modified generic daemon class
|
||||
***
|
||||
|
||||
Author: http://www.jejik.com/articles/2007/02/
|
||||
a_simple_unix_linux_daemon_in_python/www.boxedice.com
|
||||
|
||||
License: http://creativecommons.org/licenses/by-sa/3.0/
|
||||
|
||||
Changes: 23rd Jan 2009 (David Mytton <david@boxedice.com>)
|
||||
- Replaced hard coded '/dev/null in __init__ with os.devnull
|
||||
- Added OS check to conditionally remove code that doesn't
|
||||
work on OS X
|
||||
- Added output to console on completion
|
||||
- Tidied up formatting
|
||||
11th Mar 2009 (David Mytton <david@boxedice.com>)
|
||||
- Fixed problem with daemon exiting on Python 2.4
|
||||
(before SystemExit was part of the Exception base)
|
||||
13th Aug 2010 (David Mytton <david@boxedice.com>
|
||||
- Fixed unhandled exception if PID file is empty
|
||||
'''
|
||||
|
||||
# Core modules
|
||||
from __future__ import print_function
|
||||
import atexit
|
||||
import errno
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import signal
|
||||
|
||||
|
||||
class Daemon(object):
|
||||
"""
|
||||
A generic daemon class.
|
||||
|
||||
Usage: subclass the Daemon class and override the run() method
|
||||
"""
|
||||
def __init__(self, pidfile, stdin=os.devnull,
|
||||
stdout=os.devnull, stderr=os.devnull,
|
||||
home_dir='.', umask=0o22, verbose=1,
|
||||
use_gevent=False, use_eventlet=False):
|
||||
self.stdin = stdin
|
||||
self.stdout = stdout
|
||||
self.stderr = stderr
|
||||
self.pidfile = pidfile
|
||||
self.home_dir = home_dir
|
||||
self.verbose = verbose
|
||||
self.umask = umask
|
||||
self.daemon_alive = True
|
||||
self.use_gevent = use_gevent
|
||||
self.use_eventlet = use_eventlet
|
||||
|
||||
def log(self, *args):
|
||||
if self.verbose >= 1:
|
||||
print(*args)
|
||||
|
||||
def daemonize(self):
|
||||
"""
|
||||
Do the UNIX double-fork magic, see Stevens' "Advanced
|
||||
Programming in the UNIX Environment" for details (ISBN 0201563177)
|
||||
http://www.erlenstar.demon.co.uk/unix/faq_2.html#SEC16
|
||||
"""
|
||||
if self.use_eventlet:
|
||||
import eventlet.tpool
|
||||
eventlet.tpool.killall()
|
||||
try:
|
||||
pid = os.fork()
|
||||
if pid > 0:
|
||||
# Exit first parent
|
||||
sys.exit(0)
|
||||
except OSError as e:
|
||||
sys.stderr.write(
|
||||
"fork #1 failed: %d (%s)\n" % (e.errno, e.strerror))
|
||||
sys.exit(1)
|
||||
|
||||
# Decouple from parent environment
|
||||
os.chdir(self.home_dir)
|
||||
os.setsid()
|
||||
os.umask(self.umask)
|
||||
|
||||
# Do second fork
|
||||
try:
|
||||
pid = os.fork()
|
||||
if pid > 0:
|
||||
# Exit from second parent
|
||||
sys.exit(0)
|
||||
except OSError as e:
|
||||
sys.stderr.write(
|
||||
"fork #2 failed: %d (%s)\n" % (e.errno, e.strerror))
|
||||
sys.exit(1)
|
||||
|
||||
if sys.platform != 'darwin': # This block breaks on OS X
|
||||
# Redirect standard file descriptors
|
||||
sys.stdout.flush()
|
||||
sys.stderr.flush()
|
||||
si = open(self.stdin, 'r')
|
||||
so = open(self.stdout, 'a+')
|
||||
if self.stderr:
|
||||
try:
|
||||
se = open(self.stderr, 'a+', 0)
|
||||
except ValueError:
|
||||
# Python 3 can't have unbuffered text I/O
|
||||
se = open(self.stderr, 'a+', 1)
|
||||
else:
|
||||
se = so
|
||||
os.dup2(si.fileno(), sys.stdin.fileno())
|
||||
os.dup2(so.fileno(), sys.stdout.fileno())
|
||||
os.dup2(se.fileno(), sys.stderr.fileno())
|
||||
|
||||
def sigtermhandler(signum, frame):
|
||||
self.daemon_alive = False
|
||||
sys.exit()
|
||||
|
||||
if self.use_gevent:
|
||||
import gevent
|
||||
gevent.reinit()
|
||||
gevent.signal(signal.SIGTERM, sigtermhandler, signal.SIGTERM, None)
|
||||
gevent.signal(signal.SIGINT, sigtermhandler, signal.SIGINT, None)
|
||||
else:
|
||||
signal.signal(signal.SIGTERM, sigtermhandler)
|
||||
signal.signal(signal.SIGINT, sigtermhandler)
|
||||
|
||||
self.log("Started")
|
||||
|
||||
# Write pidfile
|
||||
atexit.register(
|
||||
self.delpid) # Make sure pid file is removed if we quit
|
||||
pid = str(os.getpid())
|
||||
open(self.pidfile, 'w+').write("%s\n" % pid)
|
||||
|
||||
def delpid(self):
|
||||
try:
|
||||
# the process may fork itself again
|
||||
pid = int(open(self.pidfile, 'r').read().strip())
|
||||
if pid == os.getpid():
|
||||
os.remove(self.pidfile)
|
||||
except OSError as e:
|
||||
if e.errno == errno.ENOENT:
|
||||
pass
|
||||
else:
|
||||
raise
|
||||
|
||||
def start(self, *args, **kwargs):
|
||||
"""
|
||||
Start the daemon
|
||||
"""
|
||||
|
||||
self.log("Starting...")
|
||||
|
||||
# Check for a pidfile to see if the daemon already runs
|
||||
try:
|
||||
pf = open(self.pidfile, 'r')
|
||||
pid = int(pf.read().strip())
|
||||
pf.close()
|
||||
except IOError:
|
||||
pid = None
|
||||
except SystemExit:
|
||||
pid = None
|
||||
|
||||
if pid:
|
||||
message = "pidfile %s already exists. Is it already running?\n"
|
||||
sys.stderr.write(message % self.pidfile)
|
||||
sys.exit(1)
|
||||
|
||||
# Start the daemon
|
||||
self.daemonize()
|
||||
self.run(*args, **kwargs)
|
||||
|
||||
def stop(self):
|
||||
"""
|
||||
Stop the daemon
|
||||
"""
|
||||
|
||||
if self.verbose >= 1:
|
||||
self.log("Stopping...")
|
||||
|
||||
# Get the pid from the pidfile
|
||||
pid = self.get_pid()
|
||||
|
||||
if not pid:
|
||||
message = "pidfile %s does not exist. Not running?\n"
|
||||
sys.stderr.write(message % self.pidfile)
|
||||
|
||||
# Just to be sure. A ValueError might occur if the PID file is
|
||||
# empty but does actually exist
|
||||
if os.path.exists(self.pidfile):
|
||||
os.remove(self.pidfile)
|
||||
|
||||
sys.exit(0) # Not an error in a restart
|
||||
|
||||
# Try killing the daemon process
|
||||
try:
|
||||
i = 0
|
||||
while 1:
|
||||
os.kill(pid, signal.SIGTERM)
|
||||
time.sleep(0.1)
|
||||
i = i + 1
|
||||
if i % 10 == 0:
|
||||
os.kill(pid, signal.SIGHUP)
|
||||
except OSError as err:
|
||||
if err.errno == errno.ESRCH:
|
||||
if os.path.exists(self.pidfile):
|
||||
os.remove(self.pidfile)
|
||||
else:
|
||||
print(str(err))
|
||||
sys.exit(1)
|
||||
|
||||
self.log("Stopped")
|
||||
|
||||
|
||||
def restart(self):
|
||||
"""
|
||||
Restart the daemon
|
||||
"""
|
||||
self.stop()
|
||||
self.start()
|
||||
|
||||
def get_pid(self):
|
||||
try:
|
||||
pf = open(self.pidfile, 'r')
|
||||
pid = int(pf.read().strip())
|
||||
pf.close()
|
||||
except IOError:
|
||||
pid = None
|
||||
except SystemExit:
|
||||
pid = None
|
||||
return pid
|
||||
|
||||
def is_running(self):
|
||||
pid = self.get_pid()
|
||||
|
||||
if pid is None:
|
||||
self.log('Process is stopped')
|
||||
return False
|
||||
elif os.path.exists('/proc/%d' % pid):
|
||||
self.log('Process (pid %d) is running...' % pid)
|
||||
return True
|
||||
else:
|
||||
self.log('Process (pid %d) is killed' % pid)
|
||||
return False
|
||||
|
||||
def run(self):
|
||||
"""
|
||||
You should override this method when you subclass Daemon.
|
||||
It will be called after the process has been
|
||||
daemonized by start() or restart().
|
||||
"""
|
||||
raise NotImplementedError
|
||||
4
evaluation/__init__.py
Normal file
4
evaluation/__init__.py
Normal file
@@ -0,0 +1,4 @@
|
||||
from .ftad import evaluate
|
||||
from .f1pa import evaluate
|
||||
from .affiliation import evaluate
|
||||
from .f1 import evaluate
|
||||
23
evaluation/affiliation.py
Normal file
23
evaluation/affiliation.py
Normal file
@@ -0,0 +1,23 @@
|
||||
from .affiliation_bin.generics import convert_vector_to_events
|
||||
from .affiliation_bin.metrics import pr_from_events
|
||||
|
||||
|
||||
def evaluate(y_true: list, y_pred: list) -> float:
|
||||
"""
|
||||
F1PA评估方法,经过point adjust调整标签后再用F1评分
|
||||
:param y_true: 真实标签
|
||||
:param y_pred: 检测标签
|
||||
:return: affiliation的三个score
|
||||
"""
|
||||
true, pred = y_true.copy(), y_pred.copy()
|
||||
events_pred = convert_vector_to_events(pred)
|
||||
events_gt = convert_vector_to_events(true)
|
||||
Trange = (0, len(pred))
|
||||
|
||||
res = pr_from_events(events_pred, events_gt, Trange)
|
||||
aff_precision = res["precision"]
|
||||
aff_recall = res["recall"]
|
||||
if aff_recall == 0 or aff_precision == 0:
|
||||
return 0
|
||||
aff_f1 = 2 * aff_precision * aff_recall / (aff_precision + aff_recall)
|
||||
return aff_f1
|
||||
1
evaluation/affiliation_bin/__init__.py
Normal file
1
evaluation/affiliation_bin/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
from .metrics import pr_from_events
|
||||
91
evaluation/affiliation_bin/_affiliation_zone.py
Normal file
91
evaluation/affiliation_bin/_affiliation_zone.py
Normal file
@@ -0,0 +1,91 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
from ._integral_interval import interval_intersection
|
||||
|
||||
|
||||
def t_start(j, Js=[(1, 2), (3, 4), (5, 6)], Trange=(1, 10)):
|
||||
"""
|
||||
Helper for `E_gt_func`
|
||||
|
||||
:param j: index from 0 to len(Js) (included) on which to get the start
|
||||
:param Js: ground truth events, as a list of couples
|
||||
:param Trange: range of the series where Js is included
|
||||
:return: generalized start such that the middle of t_start and t_stop
|
||||
always gives the affiliation_bin zone
|
||||
"""
|
||||
b = max(Trange)
|
||||
n = len(Js)
|
||||
if j == n:
|
||||
return (2 * b - t_stop(n - 1, Js, Trange))
|
||||
else:
|
||||
return (Js[j][0])
|
||||
|
||||
|
||||
def t_stop(j, Js=[(1, 2), (3, 4), (5, 6)], Trange=(1, 10)):
|
||||
"""
|
||||
Helper for `E_gt_func`
|
||||
|
||||
:param j: index from 0 to len(Js) (included) on which to get the stop
|
||||
:param Js: ground truth events, as a list of couples
|
||||
:param Trange: range of the series where Js is included
|
||||
:return: generalized stop such that the middle of t_start and t_stop
|
||||
always gives the affiliation_bin zone
|
||||
"""
|
||||
if j == -1:
|
||||
a = min(Trange)
|
||||
return (2 * a - t_start(0, Js, Trange))
|
||||
else:
|
||||
return (Js[j][1])
|
||||
|
||||
|
||||
def E_gt_func(j, Js, Trange):
|
||||
"""
|
||||
Get the affiliation_bin zone of element j of the ground truth
|
||||
|
||||
:param j: index from 0 to len(Js) (excluded) on which to get the zone
|
||||
:param Js: ground truth events, as a list of couples
|
||||
:param Trange: range of the series where Js is included, can
|
||||
be (-math.inf, math.inf) for distance measures
|
||||
:return: affiliation_bin zone of element j of the ground truth represented
|
||||
as a couple
|
||||
"""
|
||||
range_left = (t_stop(j - 1, Js, Trange) + t_start(j, Js, Trange)) / 2
|
||||
range_right = (t_stop(j, Js, Trange) + t_start(j + 1, Js, Trange)) / 2
|
||||
return ((range_left, range_right))
|
||||
|
||||
|
||||
def get_all_E_gt_func(Js, Trange):
|
||||
"""
|
||||
Get the affiliation_bin partition from the ground truth point of view
|
||||
|
||||
:param Js: ground truth events, as a list of couples
|
||||
:param Trange: range of the series where Js is included, can
|
||||
be (-math.inf, math.inf) for distance measures
|
||||
:return: affiliation_bin partition of the events
|
||||
"""
|
||||
# E_gt is the limit of affiliation_bin/attraction for each ground truth event
|
||||
E_gt = [E_gt_func(j, Js, Trange) for j in range(len(Js))]
|
||||
return (E_gt)
|
||||
|
||||
|
||||
def affiliation_partition(Is=[(1, 1.5), (2, 5), (5, 6), (8, 9)], E_gt=[(1, 2.5), (2.5, 4.5), (4.5, 10)]):
|
||||
"""
|
||||
Cut the events into the affiliation_bin zones
|
||||
The presentation given here is from the ground truth point of view,
|
||||
but it is also used in the reversed direction in the main function.
|
||||
|
||||
:param Is: events as a list of couples
|
||||
:param E_gt: range of the affiliation_bin zones
|
||||
:return: a list of list of intervals (each interval represented by either
|
||||
a couple or None for empty interval). The outer list is indexed by each
|
||||
affiliation_bin zone of `E_gt`. The inner list is indexed by the events of `Is`.
|
||||
"""
|
||||
out = [None] * len(E_gt)
|
||||
for j in range(len(E_gt)):
|
||||
E_gt_j = E_gt[j]
|
||||
discarded_idx_before = [I[1] < E_gt_j[0] for I in Is] # end point of predicted I is before the begin of E
|
||||
discarded_idx_after = [I[0] > E_gt_j[1] for I in Is] # start of predicted I is after the end of E
|
||||
kept_index = [not (a or b) for a, b in zip(discarded_idx_before, discarded_idx_after)]
|
||||
Is_j = [x for x, y in zip(Is, kept_index)]
|
||||
out[j] = [interval_intersection(I, E_gt[j]) for I in Is_j]
|
||||
return (out)
|
||||
493
evaluation/affiliation_bin/_integral_interval.py
Normal file
493
evaluation/affiliation_bin/_integral_interval.py
Normal file
@@ -0,0 +1,493 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
import math
|
||||
from .generics import _sum_wo_nan
|
||||
|
||||
"""
|
||||
In order to shorten the length of the variables,
|
||||
the general convention in this file is to let:
|
||||
- I for a predicted event (start, stop),
|
||||
- Is for a list of predicted events,
|
||||
- J for a ground truth event,
|
||||
- Js for a list of ground truth events.
|
||||
"""
|
||||
|
||||
|
||||
def interval_length(J=(1, 2)):
|
||||
"""
|
||||
Length of an interval
|
||||
|
||||
:param J: couple representating the start and stop of an interval, or None
|
||||
:return: length of the interval, and 0 for a None interval
|
||||
"""
|
||||
if J is None:
|
||||
return (0)
|
||||
return (J[1] - J[0])
|
||||
|
||||
|
||||
def sum_interval_lengths(Is=[(1, 2), (3, 4), (5, 6)]):
|
||||
"""
|
||||
Sum of length of the intervals
|
||||
|
||||
:param Is: list of intervals represented by starts and stops
|
||||
:return: sum of the interval length
|
||||
"""
|
||||
return (sum([interval_length(I) for I in Is]))
|
||||
|
||||
|
||||
def interval_intersection(I=(1, 3), J=(2, 4)):
|
||||
"""
|
||||
Intersection between two intervals I and J
|
||||
I and J should be either empty or represent a positive interval (no point)
|
||||
|
||||
:param I: an interval represented by start and stop
|
||||
:param J: a second interval of the same form
|
||||
:return: an interval representing the start and stop of the intersection (or None if empty)
|
||||
"""
|
||||
if I is None:
|
||||
return (None)
|
||||
if J is None:
|
||||
return (None)
|
||||
|
||||
I_inter_J = (max(I[0], J[0]), min(I[1], J[1]))
|
||||
if I_inter_J[0] >= I_inter_J[1]:
|
||||
return (None)
|
||||
else:
|
||||
return (I_inter_J)
|
||||
|
||||
|
||||
def interval_subset(I=(1, 3), J=(0, 6)):
|
||||
"""
|
||||
Checks whether I is a subset of J
|
||||
|
||||
:param I: an non empty interval represented by start and stop
|
||||
:param J: a second non empty interval of the same form
|
||||
:return: True if I is a subset of J
|
||||
"""
|
||||
if (I[0] >= J[0]) and (I[1] <= J[1]):
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
def cut_into_three_func(I, J):
|
||||
"""
|
||||
Cut an interval I into a partition of 3 subsets:
|
||||
the elements before J,
|
||||
the elements belonging to J,
|
||||
and the elements after J
|
||||
|
||||
:param I: an interval represented by start and stop, or None for an empty one
|
||||
:param J: a non empty interval
|
||||
:return: a triplet of three intervals, each represented by either (start, stop) or None
|
||||
"""
|
||||
if I is None:
|
||||
return ((None, None, None))
|
||||
|
||||
I_inter_J = interval_intersection(I, J)
|
||||
if I == I_inter_J:
|
||||
I_before = None
|
||||
I_after = None
|
||||
elif I[1] <= J[0]:
|
||||
I_before = I
|
||||
I_after = None
|
||||
elif I[0] >= J[1]:
|
||||
I_before = None
|
||||
I_after = I
|
||||
elif (I[0] <= J[0]) and (I[1] >= J[1]):
|
||||
I_before = (I[0], I_inter_J[0])
|
||||
I_after = (I_inter_J[1], I[1])
|
||||
elif I[0] <= J[0]:
|
||||
I_before = (I[0], I_inter_J[0])
|
||||
I_after = None
|
||||
elif I[1] >= J[1]:
|
||||
I_before = None
|
||||
I_after = (I_inter_J[1], I[1])
|
||||
else:
|
||||
raise ValueError('unexpected unconsidered case')
|
||||
return (I_before, I_inter_J, I_after)
|
||||
|
||||
|
||||
def get_pivot_j(I, J):
|
||||
"""
|
||||
Get the single point of J that is the closest to I, called 'pivot' here,
|
||||
with the requirement that I should be outside J
|
||||
|
||||
:param I: a non empty interval (start, stop)
|
||||
:param J: another non empty interval, with empty intersection with I
|
||||
:return: the element j of J that is the closest to I
|
||||
"""
|
||||
if interval_intersection(I, J) is not None:
|
||||
raise ValueError('I and J should have a void intersection')
|
||||
|
||||
j_pivot = None # j_pivot is a border of J
|
||||
if max(I) <= min(J):
|
||||
j_pivot = min(J)
|
||||
elif min(I) >= max(J):
|
||||
j_pivot = max(J)
|
||||
else:
|
||||
raise ValueError('I should be outside J')
|
||||
return (j_pivot)
|
||||
|
||||
|
||||
def integral_mini_interval(I, J):
|
||||
"""
|
||||
In the specific case where interval I is located outside J,
|
||||
integral of distance from x to J over the interval x \in I.
|
||||
This is the *integral* i.e. the sum.
|
||||
It's not the mean (not divided by the length of I yet)
|
||||
|
||||
:param I: a interval (start, stop), or None
|
||||
:param J: a non empty interval, with empty intersection with I
|
||||
:return: the integral of distances d(x, J) over x \in I
|
||||
"""
|
||||
if I is None:
|
||||
return (0)
|
||||
|
||||
j_pivot = get_pivot_j(I, J)
|
||||
a = min(I)
|
||||
b = max(I)
|
||||
return ((b - a) * abs((j_pivot - (a + b) / 2)))
|
||||
|
||||
|
||||
def integral_interval_distance(I, J):
|
||||
"""
|
||||
For any non empty intervals I, J, compute the
|
||||
integral of distance from x to J over the interval x \in I.
|
||||
This is the *integral* i.e. the sum.
|
||||
It's not the mean (not divided by the length of I yet)
|
||||
The interval I can intersect J or not
|
||||
|
||||
:param I: a interval (start, stop), or None
|
||||
:param J: a non empty interval
|
||||
:return: the integral of distances d(x, J) over x \in I
|
||||
"""
|
||||
|
||||
# I and J are single intervals (not generic sets)
|
||||
# I is a predicted interval in the range of affiliation_bin of J
|
||||
|
||||
def f(I_cut):
|
||||
return (integral_mini_interval(I_cut, J))
|
||||
|
||||
# If I_middle is fully included into J, it is
|
||||
# the distance to J is always 0
|
||||
def f0(I_middle):
|
||||
return (0)
|
||||
|
||||
cut_into_three = cut_into_three_func(I, J)
|
||||
# Distance for now, not the mean:
|
||||
# Distance left: Between cut_into_three[0] and the point min(J)
|
||||
d_left = f(cut_into_three[0])
|
||||
# Distance middle: Between cut_into_three[1] = I inter J, and J
|
||||
d_middle = f0(cut_into_three[1])
|
||||
# Distance right: Between cut_into_three[2] and the point max(J)
|
||||
d_right = f(cut_into_three[2])
|
||||
# It's an integral so summable
|
||||
return (d_left + d_middle + d_right)
|
||||
|
||||
|
||||
def integral_mini_interval_P_CDFmethod__min_piece(I, J, E):
|
||||
"""
|
||||
Helper of `integral_mini_interval_Pprecision_CDFmethod`
|
||||
In the specific case where interval I is located outside J,
|
||||
compute the integral $\int_{d_min}^{d_max} \min(m, x) dx$, with:
|
||||
- m the smallest distance from J to E,
|
||||
- d_min the smallest distance d(x, J) from x \in I to J
|
||||
- d_max the largest distance d(x, J) from x \in I to J
|
||||
|
||||
:param I: a single predicted interval, a non empty interval (start, stop)
|
||||
:param J: ground truth interval, a non empty interval, with empty intersection with I
|
||||
:param E: the affiliation_bin/influence zone for J, represented as a couple (start, stop)
|
||||
:return: the integral $\int_{d_min}^{d_max} \min(m, x) dx$
|
||||
"""
|
||||
if interval_intersection(I, J) is not None:
|
||||
raise ValueError('I and J should have a void intersection')
|
||||
if not interval_subset(J, E):
|
||||
raise ValueError('J should be included in E')
|
||||
if not interval_subset(I, E):
|
||||
raise ValueError('I should be included in E')
|
||||
|
||||
e_min = min(E)
|
||||
j_min = min(J)
|
||||
j_max = max(J)
|
||||
e_max = max(E)
|
||||
i_min = min(I)
|
||||
i_max = max(I)
|
||||
|
||||
d_min = max(i_min - j_max, j_min - i_max)
|
||||
d_max = max(i_max - j_max, j_min - i_min)
|
||||
m = min(j_min - e_min, e_max - j_max)
|
||||
A = min(d_max, m) ** 2 - min(d_min, m) ** 2
|
||||
B = max(d_max, m) - max(d_min, m)
|
||||
C = (1 / 2) * A + m * B
|
||||
return (C)
|
||||
|
||||
|
||||
def integral_mini_interval_Pprecision_CDFmethod(I, J, E):
|
||||
"""
|
||||
Integral of the probability of distances over the interval I.
|
||||
In the specific case where interval I is located outside J,
|
||||
compute the integral $\int_{x \in I} Fbar(dist(x,J)) dx$.
|
||||
This is the *integral* i.e. the sum (not the mean)
|
||||
|
||||
:param I: a single predicted interval, a non empty interval (start, stop)
|
||||
:param J: ground truth interval, a non empty interval, with empty intersection with I
|
||||
:param E: the affiliation_bin/influence zone for J, represented as a couple (start, stop)
|
||||
:return: the integral $\int_{x \in I} Fbar(dist(x,J)) dx$
|
||||
"""
|
||||
integral_min_piece = integral_mini_interval_P_CDFmethod__min_piece(I, J, E)
|
||||
|
||||
e_min = min(E)
|
||||
j_min = min(J)
|
||||
j_max = max(J)
|
||||
e_max = max(E)
|
||||
i_min = min(I)
|
||||
i_max = max(I)
|
||||
d_min = max(i_min - j_max, j_min - i_max)
|
||||
d_max = max(i_max - j_max, j_min - i_min)
|
||||
integral_linear_piece = (1 / 2) * (d_max ** 2 - d_min ** 2)
|
||||
integral_remaining_piece = (j_max - j_min) * (i_max - i_min)
|
||||
|
||||
DeltaI = i_max - i_min
|
||||
DeltaE = e_max - e_min
|
||||
|
||||
output = DeltaI - (1 / DeltaE) * (integral_min_piece + integral_linear_piece + integral_remaining_piece)
|
||||
return (output)
|
||||
|
||||
|
||||
def integral_interval_probaCDF_precision(I, J, E):
|
||||
"""
|
||||
Integral of the probability of distances over the interval I.
|
||||
Compute the integral $\int_{x \in I} Fbar(dist(x,J)) dx$.
|
||||
This is the *integral* i.e. the sum (not the mean)
|
||||
|
||||
:param I: a single (non empty) predicted interval in the zone of affiliation_bin of J
|
||||
:param J: ground truth interval
|
||||
:param E: affiliation_bin/influence zone for J
|
||||
:return: the integral $\int_{x \in I} Fbar(dist(x,J)) dx$
|
||||
"""
|
||||
|
||||
# I and J are single intervals (not generic sets)
|
||||
def f(I_cut):
|
||||
if I_cut is None:
|
||||
return (0)
|
||||
else:
|
||||
return (integral_mini_interval_Pprecision_CDFmethod(I_cut, J, E))
|
||||
|
||||
# If I_middle is fully included into J, it is
|
||||
# integral of 1 on the interval I_middle, so it's |I_middle|
|
||||
def f0(I_middle):
|
||||
if I_middle is None:
|
||||
return (0)
|
||||
else:
|
||||
return (max(I_middle) - min(I_middle))
|
||||
|
||||
cut_into_three = cut_into_three_func(I, J)
|
||||
# Distance for now, not the mean:
|
||||
# Distance left: Between cut_into_three[0] and the point min(J)
|
||||
d_left = f(cut_into_three[0])
|
||||
# Distance middle: Between cut_into_three[1] = I inter J, and J
|
||||
d_middle = f0(cut_into_three[1])
|
||||
# Distance right: Between cut_into_three[2] and the point max(J)
|
||||
d_right = f(cut_into_three[2])
|
||||
# It's an integral so summable
|
||||
return (d_left + d_middle + d_right)
|
||||
|
||||
|
||||
def cut_J_based_on_mean_func(J, e_mean):
|
||||
"""
|
||||
Helper function for the recall.
|
||||
Partition J into two intervals: before and after e_mean
|
||||
(e_mean represents the center element of E the zone of affiliation_bin)
|
||||
|
||||
:param J: ground truth interval
|
||||
:param e_mean: a float number (center value of E)
|
||||
:return: a couple partitionning J into (J_before, J_after)
|
||||
"""
|
||||
if J is None:
|
||||
J_before = None
|
||||
J_after = None
|
||||
elif e_mean >= max(J):
|
||||
J_before = J
|
||||
J_after = None
|
||||
elif e_mean <= min(J):
|
||||
J_before = None
|
||||
J_after = J
|
||||
else: # e_mean is across J
|
||||
J_before = (min(J), e_mean)
|
||||
J_after = (e_mean, max(J))
|
||||
|
||||
return ((J_before, J_after))
|
||||
|
||||
|
||||
def integral_mini_interval_Precall_CDFmethod(I, J, E):
|
||||
"""
|
||||
Integral of the probability of distances over the interval J.
|
||||
In the specific case where interval J is located outside I,
|
||||
compute the integral $\int_{y \in J} Fbar_y(dist(y,I)) dy$.
|
||||
This is the *integral* i.e. the sum (not the mean)
|
||||
|
||||
:param I: a single (non empty) predicted interval
|
||||
:param J: ground truth (non empty) interval, with empty intersection with I
|
||||
:param E: the affiliation_bin/influence zone for J, represented as a couple (start, stop)
|
||||
:return: the integral $\int_{y \in J} Fbar_y(dist(y,I)) dy$
|
||||
"""
|
||||
# The interval J should be located outside I
|
||||
# (so it's either the left piece or the right piece w.r.t I)
|
||||
i_pivot = get_pivot_j(J, I)
|
||||
e_min = min(E)
|
||||
e_max = max(E)
|
||||
e_mean = (e_min + e_max) / 2
|
||||
|
||||
# If i_pivot is outside E (it's possible), then
|
||||
# the distance is worst that any random element within E,
|
||||
# so we set the recall to 0
|
||||
if i_pivot <= min(E):
|
||||
return (0)
|
||||
elif i_pivot >= max(E):
|
||||
return (0)
|
||||
# Otherwise, we have at least i_pivot in E and so d < M so min(d,M)=d
|
||||
|
||||
cut_J_based_on_e_mean = cut_J_based_on_mean_func(J, e_mean)
|
||||
J_before = cut_J_based_on_e_mean[0]
|
||||
J_after = cut_J_based_on_e_mean[1]
|
||||
|
||||
iemin_mean = (e_min + i_pivot) / 2
|
||||
cut_Jbefore_based_on_iemin_mean = cut_J_based_on_mean_func(J_before, iemin_mean)
|
||||
J_before_closeE = cut_Jbefore_based_on_iemin_mean[
|
||||
0] # before e_mean and closer to e_min than i_pivot ~ J_before_before
|
||||
J_before_closeI = cut_Jbefore_based_on_iemin_mean[
|
||||
1] # before e_mean and closer to i_pivot than e_min ~ J_before_after
|
||||
|
||||
iemax_mean = (e_max + i_pivot) / 2
|
||||
cut_Jafter_based_on_iemax_mean = cut_J_based_on_mean_func(J_after, iemax_mean)
|
||||
J_after_closeI = cut_Jafter_based_on_iemax_mean[0] # after e_mean and closer to i_pivot than e_max ~ J_after_before
|
||||
J_after_closeE = cut_Jafter_based_on_iemax_mean[1] # after e_mean and closer to e_max than i_pivot ~ J_after_after
|
||||
|
||||
if J_before_closeE is not None:
|
||||
j_before_before_min = min(J_before_closeE) # == min(J)
|
||||
j_before_before_max = max(J_before_closeE)
|
||||
else:
|
||||
j_before_before_min = math.nan
|
||||
j_before_before_max = math.nan
|
||||
|
||||
if J_before_closeI is not None:
|
||||
j_before_after_min = min(J_before_closeI) # == j_before_before_max if existing
|
||||
j_before_after_max = max(J_before_closeI) # == max(J_before)
|
||||
else:
|
||||
j_before_after_min = math.nan
|
||||
j_before_after_max = math.nan
|
||||
|
||||
if J_after_closeI is not None:
|
||||
j_after_before_min = min(J_after_closeI) # == min(J_after)
|
||||
j_after_before_max = max(J_after_closeI)
|
||||
else:
|
||||
j_after_before_min = math.nan
|
||||
j_after_before_max = math.nan
|
||||
|
||||
if J_after_closeE is not None:
|
||||
j_after_after_min = min(J_after_closeE) # == j_after_before_max if existing
|
||||
j_after_after_max = max(J_after_closeE) # == max(J)
|
||||
else:
|
||||
j_after_after_min = math.nan
|
||||
j_after_after_max = math.nan
|
||||
|
||||
# <-- J_before_closeE --> <-- J_before_closeI --> <-- J_after_closeI --> <-- J_after_closeE -->
|
||||
# j_bb_min j_bb_max j_ba_min j_ba_max j_ab_min j_ab_max j_aa_min j_aa_max
|
||||
# (with `b` for before and `a` for after in the previous variable names)
|
||||
|
||||
# vs e_mean m = min(t-e_min, e_max-t) d=|i_pivot-t| min(d,m) \int min(d,m)dt \int d dt \int_(min(d,m)+d)dt \int_{t \in J}(min(d,m)+d)dt
|
||||
# Case J_before_closeE & i_pivot after J before t-e_min i_pivot-t min(i_pivot-t,t-e_min) = t-e_min t^2/2-e_min*t i_pivot*t-t^2/2 t^2/2-e_min*t+i_pivot*t-t^2/2 = (i_pivot-e_min)*t (i_pivot-e_min)*tB - (i_pivot-e_min)*tA = (i_pivot-e_min)*(tB-tA)
|
||||
# Case J_before_closeI & i_pivot after J before t-e_min i_pivot-t min(i_pivot-t,t-e_min) = i_pivot-t i_pivot*t-t^2/2 i_pivot*t-t^2/2 i_pivot*t-t^2/2+i_pivot*t-t^2/2 = 2*i_pivot*t-t^2 2*i_pivot*tB-tB^2 - 2*i_pivot*tA + tA^2 = 2*i_pivot*(tB-tA) - (tB^2 - tA^2)
|
||||
# Case J_after_closeI & i_pivot after J after e_max-t i_pivot-t min(i_pivot-t,e_max-t) = i_pivot-t i_pivot*t-t^2/2 i_pivot*t-t^2/2 i_pivot*t-t^2/2+i_pivot*t-t^2/2 = 2*i_pivot*t-t^2 2*i_pivot*tB-tB^2 - 2*i_pivot*tA + tA^2 = 2*i_pivot*(tB-tA) - (tB^2 - tA^2)
|
||||
# Case J_after_closeE & i_pivot after J after e_max-t i_pivot-t min(i_pivot-t,e_max-t) = e_max-t e_max*t-t^2/2 i_pivot*t-t^2/2 e_max*t-t^2/2+i_pivot*t-t^2/2 = (e_max+i_pivot)*t-t^2 (e_max+i_pivot)*tB-tB^2 - (e_max+i_pivot)*tA + tA^2 = (e_max+i_pivot)*(tB-tA) - (tB^2 - tA^2)
|
||||
#
|
||||
# Case J_before_closeE & i_pivot before J before t-e_min t-i_pivot min(t-i_pivot,t-e_min) = t-e_min t^2/2-e_min*t t^2/2-i_pivot*t t^2/2-e_min*t+t^2/2-i_pivot*t = t^2-(e_min+i_pivot)*t tB^2-(e_min+i_pivot)*tB - tA^2 + (e_min+i_pivot)*tA = (tB^2 - tA^2) - (e_min+i_pivot)*(tB-tA)
|
||||
# Case J_before_closeI & i_pivot before J before t-e_min t-i_pivot min(t-i_pivot,t-e_min) = t-i_pivot t^2/2-i_pivot*t t^2/2-i_pivot*t t^2/2-i_pivot*t+t^2/2-i_pivot*t = t^2-2*i_pivot*t tB^2-2*i_pivot*tB - tA^2 + 2*i_pivot*tA = (tB^2 - tA^2) - 2*i_pivot*(tB-tA)
|
||||
# Case J_after_closeI & i_pivot before J after e_max-t t-i_pivot min(t-i_pivot,e_max-t) = t-i_pivot t^2/2-i_pivot*t t^2/2-i_pivot*t t^2/2-i_pivot*t+t^2/2-i_pivot*t = t^2-2*i_pivot*t tB^2-2*i_pivot*tB - tA^2 + 2*i_pivot*tA = (tB^2 - tA^2) - 2*i_pivot*(tB-tA)
|
||||
# Case J_after_closeE & i_pivot before J after e_max-t t-i_pivot min(t-i_pivot,e_max-t) = e_max-t e_max*t-t^2/2 t^2/2-i_pivot*t e_max*t-t^2/2+t^2/2-i_pivot*t = (e_max-i_pivot)*t (e_max-i_pivot)*tB - (e_max-i_pivot)*tA = (e_max-i_pivot)*(tB-tA)
|
||||
|
||||
if i_pivot >= max(J):
|
||||
part1_before_closeE = (i_pivot - e_min) * (
|
||||
j_before_before_max - j_before_before_min) # (i_pivot-e_min)*(tB-tA) # j_before_before_max - j_before_before_min
|
||||
part2_before_closeI = 2 * i_pivot * (j_before_after_max - j_before_after_min) - (
|
||||
j_before_after_max ** 2 - j_before_after_min ** 2) # 2*i_pivot*(tB-tA) - (tB^2 - tA^2) # j_before_after_max - j_before_after_min
|
||||
part3_after_closeI = 2 * i_pivot * (j_after_before_max - j_after_before_min) - (
|
||||
j_after_before_max ** 2 - j_after_before_min ** 2) # 2*i_pivot*(tB-tA) - (tB^2 - tA^2) # j_after_before_max - j_after_before_min
|
||||
part4_after_closeE = (e_max + i_pivot) * (j_after_after_max - j_after_after_min) - (
|
||||
j_after_after_max ** 2 - j_after_after_min ** 2) # (e_max+i_pivot)*(tB-tA) - (tB^2 - tA^2) # j_after_after_max - j_after_after_min
|
||||
out_parts = [part1_before_closeE, part2_before_closeI, part3_after_closeI, part4_after_closeE]
|
||||
elif i_pivot <= min(J):
|
||||
part1_before_closeE = (j_before_before_max ** 2 - j_before_before_min ** 2) - (e_min + i_pivot) * (
|
||||
j_before_before_max - j_before_before_min) # (tB^2 - tA^2) - (e_min+i_pivot)*(tB-tA) # j_before_before_max - j_before_before_min
|
||||
part2_before_closeI = (j_before_after_max ** 2 - j_before_after_min ** 2) - 2 * i_pivot * (
|
||||
j_before_after_max - j_before_after_min) # (tB^2 - tA^2) - 2*i_pivot*(tB-tA) # j_before_after_max - j_before_after_min
|
||||
part3_after_closeI = (j_after_before_max ** 2 - j_after_before_min ** 2) - 2 * i_pivot * (
|
||||
j_after_before_max - j_after_before_min) # (tB^2 - tA^2) - 2*i_pivot*(tB-tA) # j_after_before_max - j_after_before_min
|
||||
part4_after_closeE = (e_max - i_pivot) * (
|
||||
j_after_after_max - j_after_after_min) # (e_max-i_pivot)*(tB-tA) # j_after_after_max - j_after_after_min
|
||||
out_parts = [part1_before_closeE, part2_before_closeI, part3_after_closeI, part4_after_closeE]
|
||||
else:
|
||||
raise ValueError('The i_pivot should be outside J')
|
||||
|
||||
out_integral_min_dm_plus_d = _sum_wo_nan(out_parts) # integral on all J, i.e. sum of the disjoint parts
|
||||
|
||||
# We have for each point t of J:
|
||||
# \bar{F}_{t, recall}(d) = 1 - (1/|E|) * (min(d,m) + d)
|
||||
# Since t is a single-point here, and we are in the case where i_pivot is inside E.
|
||||
# The integral is then given by:
|
||||
# C = \int_{t \in J} \bar{F}_{t, recall}(D(t)) dt
|
||||
# = \int_{t \in J} 1 - (1/|E|) * (min(d,m) + d) dt
|
||||
# = |J| - (1/|E|) * [\int_{t \in J} (min(d,m) + d) dt]
|
||||
# = |J| - (1/|E|) * out_integral_min_dm_plus_d
|
||||
DeltaJ = max(J) - min(J)
|
||||
DeltaE = max(E) - min(E)
|
||||
C = DeltaJ - (1 / DeltaE) * out_integral_min_dm_plus_d
|
||||
|
||||
return (C)
|
||||
|
||||
|
||||
def integral_interval_probaCDF_recall(I, J, E):
|
||||
"""
|
||||
Integral of the probability of distances over the interval J.
|
||||
Compute the integral $\int_{y \in J} Fbar_y(dist(y,I)) dy$.
|
||||
This is the *integral* i.e. the sum (not the mean)
|
||||
|
||||
:param I: a single (non empty) predicted interval
|
||||
:param J: ground truth (non empty) interval
|
||||
:param E: the affiliation_bin/influence zone for J
|
||||
:return: the integral $\int_{y \in J} Fbar_y(dist(y,I)) dy$
|
||||
"""
|
||||
|
||||
# I and J are single intervals (not generic sets)
|
||||
# E is the outside affiliation_bin interval of J (even for recall!)
|
||||
# (in particular J \subset E)
|
||||
#
|
||||
# J is the portion of the ground truth affiliated to I
|
||||
# I is a predicted interval (can be outside E possibly since it's recall)
|
||||
def f(J_cut):
|
||||
if J_cut is None:
|
||||
return (0)
|
||||
else:
|
||||
return integral_mini_interval_Precall_CDFmethod(I, J_cut, E)
|
||||
|
||||
# If J_middle is fully included into I, it is
|
||||
# integral of 1 on the interval J_middle, so it's |J_middle|
|
||||
def f0(J_middle):
|
||||
if J_middle is None:
|
||||
return (0)
|
||||
else:
|
||||
return (max(J_middle) - min(J_middle))
|
||||
|
||||
cut_into_three = cut_into_three_func(J, I) # it's J that we cut into 3, depending on the position w.r.t I
|
||||
# since we integrate over J this time.
|
||||
#
|
||||
# Distance for now, not the mean:
|
||||
# Distance left: Between cut_into_three[0] and the point min(I)
|
||||
d_left = f(cut_into_three[0])
|
||||
# Distance middle: Between cut_into_three[1] = J inter I, and I
|
||||
d_middle = f0(cut_into_three[1])
|
||||
# Distance right: Between cut_into_three[2] and the point max(I)
|
||||
d_right = f(cut_into_three[2])
|
||||
# It's an integral so summable
|
||||
return (d_left + d_middle + d_right)
|
||||
72
evaluation/affiliation_bin/_single_ground_truth_event.py
Normal file
72
evaluation/affiliation_bin/_single_ground_truth_event.py
Normal file
@@ -0,0 +1,72 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
import math
|
||||
from ._affiliation_zone import (
|
||||
get_all_E_gt_func,
|
||||
affiliation_partition)
|
||||
from ._integral_interval import (
|
||||
integral_interval_distance,
|
||||
integral_interval_probaCDF_precision,
|
||||
integral_interval_probaCDF_recall,
|
||||
interval_length,
|
||||
sum_interval_lengths)
|
||||
|
||||
|
||||
def affiliation_precision_distance(Is=[(1, 2), (3, 4), (5, 6)], J=(2, 5.5)):
|
||||
"""
|
||||
Compute the individual average distance from Is to a single ground truth J
|
||||
|
||||
:param Is: list of predicted events within the affiliation_bin zone of J
|
||||
:param J: couple representating the start and stop of a ground truth interval
|
||||
:return: individual average precision directed distance number
|
||||
"""
|
||||
if all([I is None for I in Is]): # no prediction in the current area
|
||||
return (math.nan) # undefined
|
||||
return (sum([integral_interval_distance(I, J) for I in Is]) / sum_interval_lengths(Is))
|
||||
|
||||
|
||||
def affiliation_precision_proba(Is=[(1, 2), (3, 4), (5, 6)], J=(2, 5.5), E=(0, 8)):
|
||||
"""
|
||||
Compute the individual precision probability from Is to a single ground truth J
|
||||
|
||||
:param Is: list of predicted events within the affiliation_bin zone of J
|
||||
:param J: couple representating the start and stop of a ground truth interval
|
||||
:param E: couple representing the start and stop of the zone of affiliation_bin of J
|
||||
:return: individual precision probability in [0, 1], or math.nan if undefined
|
||||
"""
|
||||
if all([I is None for I in Is]): # no prediction in the current area
|
||||
return (math.nan) # undefined
|
||||
return (sum([integral_interval_probaCDF_precision(I, J, E) for I in Is]) / sum_interval_lengths(Is))
|
||||
|
||||
|
||||
def affiliation_recall_distance(Is=[(1, 2), (3, 4), (5, 6)], J=(2, 5.5)):
|
||||
"""
|
||||
Compute the individual average distance from a single J to the predictions Is
|
||||
|
||||
:param Is: list of predicted events within the affiliation_bin zone of J
|
||||
:param J: couple representating the start and stop of a ground truth interval
|
||||
:return: individual average recall directed distance number
|
||||
"""
|
||||
Is = [I for I in Is if I is not None] # filter possible None in Is
|
||||
if len(Is) == 0: # there is no prediction in the current area
|
||||
return (math.inf)
|
||||
E_gt_recall = get_all_E_gt_func(Is, (-math.inf, math.inf)) # here from the point of view of the predictions
|
||||
Js = affiliation_partition([J], E_gt_recall) # partition of J depending of proximity with Is
|
||||
return (sum([integral_interval_distance(J[0], I) for I, J in zip(Is, Js)]) / interval_length(J))
|
||||
|
||||
|
||||
def affiliation_recall_proba(Is=[(1, 2), (3, 4), (5, 6)], J=(2, 5.5), E=(0, 8)):
|
||||
"""
|
||||
Compute the individual recall probability from a single ground truth J to Is
|
||||
|
||||
:param Is: list of predicted events within the affiliation_bin zone of J
|
||||
:param J: couple representating the start and stop of a ground truth interval
|
||||
:param E: couple representing the start and stop of the zone of affiliation_bin of J
|
||||
:return: individual recall probability in [0, 1]
|
||||
"""
|
||||
Is = [I for I in Is if I is not None] # filter possible None in Is
|
||||
if len(Is) == 0: # there is no prediction in the current area
|
||||
return (0)
|
||||
E_gt_recall = get_all_E_gt_func(Is, E) # here from the point of view of the predictions
|
||||
Js = affiliation_partition([J], E_gt_recall) # partition of J depending of proximity with Is
|
||||
return (sum([integral_interval_probaCDF_recall(I, J[0], E) for I, J in zip(Is, Js)]) / interval_length(J))
|
||||
143
evaluation/affiliation_bin/generics.py
Normal file
143
evaluation/affiliation_bin/generics.py
Normal file
@@ -0,0 +1,143 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
from itertools import groupby
|
||||
from operator import itemgetter
|
||||
import math
|
||||
import gzip
|
||||
import glob
|
||||
import os
|
||||
|
||||
|
||||
def convert_vector_to_events(vector=[0, 1, 1, 0, 0, 1, 0]):
|
||||
"""
|
||||
Convert a binary vector (indicating 1 for the anomalous instances)
|
||||
to a list of events. The events are considered as durations,
|
||||
i.e. setting 1 at index i corresponds to an anomalous interval [i, i+1).
|
||||
|
||||
:param vector: a list of elements belonging to {0, 1}
|
||||
:return: a list of couples, each couple representing the start and stop of
|
||||
each event
|
||||
"""
|
||||
positive_indexes = [idx for idx, val in enumerate(vector) if val > 0]
|
||||
events = []
|
||||
for k, g in groupby(enumerate(positive_indexes), lambda ix: ix[0] - ix[1]):
|
||||
cur_cut = list(map(itemgetter(1), g))
|
||||
events.append((cur_cut[0], cur_cut[-1]))
|
||||
|
||||
# Consistent conversion in case of range anomalies (for indexes):
|
||||
# A positive index i is considered as the interval [i, i+1),
|
||||
# so the last index should be moved by 1
|
||||
events = [(x, y + 1) for (x, y) in events]
|
||||
|
||||
return (events)
|
||||
|
||||
|
||||
def infer_Trange(events_pred, events_gt):
|
||||
"""
|
||||
Given the list of events events_pred and events_gt, get the
|
||||
smallest possible Trange corresponding to the start and stop indexes
|
||||
of the whole series.
|
||||
Trange will not influence the measure of distances, but will impact the
|
||||
measures of probabilities.
|
||||
|
||||
:param events_pred: a list of couples corresponding to predicted events
|
||||
:param events_gt: a list of couples corresponding to ground truth events
|
||||
:return: a couple corresponding to the smallest range containing the events
|
||||
"""
|
||||
if len(events_gt) == 0:
|
||||
raise ValueError('The gt events should contain at least one event')
|
||||
if len(events_pred) == 0:
|
||||
# empty prediction, base Trange only on events_gt (which is non empty)
|
||||
return (infer_Trange(events_gt, events_gt))
|
||||
|
||||
min_pred = min([x[0] for x in events_pred])
|
||||
min_gt = min([x[0] for x in events_gt])
|
||||
max_pred = max([x[1] for x in events_pred])
|
||||
max_gt = max([x[1] for x in events_gt])
|
||||
Trange = (min(min_pred, min_gt), max(max_pred, max_gt))
|
||||
return (Trange)
|
||||
|
||||
|
||||
def has_point_anomalies(events):
|
||||
"""
|
||||
Checking whether events contain point anomalies, i.e.
|
||||
events starting and stopping at the same time.
|
||||
|
||||
:param events: a list of couples corresponding to predicted events
|
||||
:return: True is the events have any point anomalies, False otherwise
|
||||
"""
|
||||
if len(events) == 0:
|
||||
return (False)
|
||||
return (min([x[1] - x[0] for x in events]) == 0)
|
||||
|
||||
|
||||
def _sum_wo_nan(vec):
|
||||
"""
|
||||
Sum of elements, ignoring math.isnan ones
|
||||
|
||||
:param vec: vector of floating numbers
|
||||
:return: sum of the elements, ignoring math.isnan ones
|
||||
"""
|
||||
vec_wo_nan = [e for e in vec if not math.isnan(e)]
|
||||
return (sum(vec_wo_nan))
|
||||
|
||||
|
||||
def _len_wo_nan(vec):
|
||||
"""
|
||||
Count of elements, ignoring math.isnan ones
|
||||
|
||||
:param vec: vector of floating numbers
|
||||
:return: count of the elements, ignoring math.isnan ones
|
||||
"""
|
||||
vec_wo_nan = [e for e in vec if not math.isnan(e)]
|
||||
return (len(vec_wo_nan))
|
||||
|
||||
|
||||
def read_gz_data(filename='data/machinetemp_groundtruth.gz'):
|
||||
"""
|
||||
Load a file compressed with gz, such that each line of the
|
||||
file is either 0 (representing a normal instance) or 1 (representing)
|
||||
an anomalous instance.
|
||||
:param filename: file path to the gz compressed file
|
||||
:return: list of integers with either 0 or 1
|
||||
"""
|
||||
with gzip.open(filename, 'rb') as f:
|
||||
content = f.read().splitlines()
|
||||
content = [int(x) for x in content]
|
||||
return (content)
|
||||
|
||||
|
||||
def read_all_as_events():
|
||||
"""
|
||||
Load the files contained in the folder `data/` and convert
|
||||
to events. The length of the series is kept.
|
||||
The convention for the file name is: `dataset_algorithm.gz`
|
||||
:return: two dictionaries:
|
||||
- the first containing the list of events for each dataset and algorithm,
|
||||
- the second containing the range of the series for each dataset
|
||||
"""
|
||||
filepaths = glob.glob('data/*.gz')
|
||||
datasets = dict()
|
||||
Tranges = dict()
|
||||
for filepath in filepaths:
|
||||
vector = read_gz_data(filepath)
|
||||
events = convert_vector_to_events(vector)
|
||||
# ad hoc cut for those files
|
||||
cut_filepath = (os.path.split(filepath)[1]).split('_')
|
||||
data_name = cut_filepath[0]
|
||||
algo_name = (cut_filepath[1]).split('.')[0]
|
||||
if not data_name in datasets:
|
||||
datasets[data_name] = dict()
|
||||
Tranges[data_name] = (0, len(vector))
|
||||
datasets[data_name][algo_name] = events
|
||||
return (datasets, Tranges)
|
||||
|
||||
|
||||
def f1_func(p, r):
|
||||
"""
|
||||
Compute the f1 function
|
||||
:param p: precision numeric value
|
||||
:param r: recall numeric value
|
||||
:return: f1 numeric value
|
||||
"""
|
||||
return (2 * p * r / (p + r))
|
||||
119
evaluation/affiliation_bin/metrics.py
Normal file
119
evaluation/affiliation_bin/metrics.py
Normal file
@@ -0,0 +1,119 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
from .generics import (
|
||||
infer_Trange,
|
||||
has_point_anomalies,
|
||||
_len_wo_nan,
|
||||
_sum_wo_nan,
|
||||
read_all_as_events)
|
||||
from ._affiliation_zone import (
|
||||
get_all_E_gt_func,
|
||||
affiliation_partition)
|
||||
from ._single_ground_truth_event import (
|
||||
affiliation_precision_distance,
|
||||
affiliation_recall_distance,
|
||||
affiliation_precision_proba,
|
||||
affiliation_recall_proba)
|
||||
|
||||
|
||||
def test_events(events):
|
||||
"""
|
||||
Verify the validity of the input events
|
||||
:param events: list of events, each represented by a couple (start, stop)
|
||||
:return: None. Raise an error for incorrect formed or non ordered events
|
||||
"""
|
||||
if type(events) is not list:
|
||||
raise TypeError('Input `events` should be a list of couples')
|
||||
if not all([type(x) is tuple for x in events]):
|
||||
raise TypeError('Input `events` should be a list of tuples')
|
||||
if not all([len(x) == 2 for x in events]):
|
||||
raise ValueError('Input `events` should be a list of couples (start, stop)')
|
||||
if not all([x[0] <= x[1] for x in events]):
|
||||
raise ValueError('Input `events` should be a list of couples (start, stop) with start <= stop')
|
||||
if not all([events[i][1] < events[i + 1][0] for i in range(len(events) - 1)]):
|
||||
raise ValueError('Couples of input `events` should be disjoint and ordered')
|
||||
|
||||
|
||||
def pr_from_events(events_pred, events_gt, Trange):
|
||||
"""
|
||||
Compute the affiliation_bin metrics including the precision/recall in [0,1],
|
||||
along with the individual precision/recall distances and probabilities
|
||||
|
||||
:param events_pred: list of predicted events, each represented by a couple
|
||||
indicating the start and the stop of the event
|
||||
:param events_gt: list of ground truth events, each represented by a couple
|
||||
indicating the start and the stop of the event
|
||||
:param Trange: range of the series where events_pred and events_gt are included,
|
||||
represented as a couple (start, stop)
|
||||
:return: dictionary with precision, recall, and the individual metrics
|
||||
"""
|
||||
# testing the inputs
|
||||
test_events(events_pred)
|
||||
test_events(events_gt)
|
||||
|
||||
# other tests
|
||||
minimal_Trange = infer_Trange(events_pred, events_gt)
|
||||
if not Trange[0] <= minimal_Trange[0]:
|
||||
raise ValueError('`Trange` should include all the events')
|
||||
if not minimal_Trange[1] <= Trange[1]:
|
||||
raise ValueError('`Trange` should include all the events')
|
||||
|
||||
if len(events_gt) == 0:
|
||||
raise ValueError('Input `events_gt` should have at least one event')
|
||||
|
||||
if has_point_anomalies(events_pred) or has_point_anomalies(events_gt):
|
||||
raise ValueError('Cannot manage point anomalies currently')
|
||||
|
||||
if Trange is None:
|
||||
# Set as default, but Trange should be indicated if probabilities are used
|
||||
raise ValueError('Trange should be indicated (or inferred with the `infer_Trange` function')
|
||||
|
||||
E_gt = get_all_E_gt_func(events_gt, Trange)
|
||||
aff_partition = affiliation_partition(events_pred, E_gt)
|
||||
|
||||
# Computing precision distance
|
||||
d_precision = [affiliation_precision_distance(Is, J) for Is, J in zip(aff_partition, events_gt)]
|
||||
|
||||
# Computing recall distance
|
||||
d_recall = [affiliation_recall_distance(Is, J) for Is, J in zip(aff_partition, events_gt)]
|
||||
|
||||
# Computing precision
|
||||
p_precision = [affiliation_precision_proba(Is, J, E) for Is, J, E in zip(aff_partition, events_gt, E_gt)]
|
||||
|
||||
# Computing recall
|
||||
p_recall = [affiliation_recall_proba(Is, J, E) for Is, J, E in zip(aff_partition, events_gt, E_gt)]
|
||||
|
||||
if _len_wo_nan(p_precision) > 0:
|
||||
p_precision_average = _sum_wo_nan(p_precision) / _len_wo_nan(p_precision)
|
||||
else:
|
||||
p_precision_average = p_precision[0] # math.nan
|
||||
p_recall_average = sum(p_recall) / len(p_recall)
|
||||
|
||||
dict_out = dict({'precision': p_precision_average,
|
||||
'recall': p_recall_average,
|
||||
'individual_precision_probabilities': p_precision,
|
||||
'individual_recall_probabilities': p_recall,
|
||||
'individual_precision_distances': d_precision,
|
||||
'individual_recall_distances': d_recall})
|
||||
return (dict_out)
|
||||
|
||||
|
||||
def produce_all_results():
|
||||
"""
|
||||
Produce the affiliation_bin precision/recall for all files
|
||||
contained in the `data` repository
|
||||
:return: a dictionary indexed by data names, each containing a dictionary
|
||||
indexed by algorithm names, each containing the results of the affiliation_bin
|
||||
metrics (precision, recall, individual probabilities and distances)
|
||||
"""
|
||||
datasets, Tranges = read_all_as_events() # read all the events in folder `data`
|
||||
results = dict()
|
||||
for data_name in datasets.keys():
|
||||
results_data = dict()
|
||||
for algo_name in datasets[data_name].keys():
|
||||
if algo_name != 'groundtruth':
|
||||
results_data[algo_name] = pr_from_events(datasets[data_name][algo_name],
|
||||
datasets[data_name]['groundtruth'],
|
||||
Tranges[data_name])
|
||||
results[data_name] = results_data
|
||||
return (results)
|
||||
12
evaluation/f1.py
Normal file
12
evaluation/f1.py
Normal file
@@ -0,0 +1,12 @@
|
||||
from sklearn.metrics import f1_score, precision_score, recall_score
|
||||
|
||||
|
||||
def evaluate(y_true: list, y_pred: list) -> float:
|
||||
"""
|
||||
F1评估方法
|
||||
:param y_true: 真实标签
|
||||
:param y_pred: 检测标签
|
||||
:return: f1、recall、precision
|
||||
"""
|
||||
f1 = f1_score(y_true, y_pred)
|
||||
return f1
|
||||
66
evaluation/f1pa.py
Normal file
66
evaluation/f1pa.py
Normal file
@@ -0,0 +1,66 @@
|
||||
import numpy as np
|
||||
from sklearn.metrics import f1_score, precision_score, recall_score
|
||||
|
||||
|
||||
def adjust_predicts(score, label,
|
||||
threshold=None,
|
||||
pred=None,
|
||||
calc_latency=False):
|
||||
"""
|
||||
该函数是point-adjust官方源码
|
||||
Calculate adjusted predict labels using given `score`, `threshold` (or given `pred`) and `label`.
|
||||
Args:
|
||||
score =: The anomaly score
|
||||
label : The ground-truth label
|
||||
threshold (float): The threshold of anomaly score.
|
||||
A point is labeled as "anomaly" if its score is lower than the threshold.
|
||||
pred : if not None, adjust `pred` and ignore `score` and `threshold`,
|
||||
calc_latency (bool):
|
||||
Returns:
|
||||
np.ndarray: predict labels
|
||||
"""
|
||||
if len(score) != len(label):
|
||||
raise ValueError("score and label must have the same length")
|
||||
score = np.asarray(score)
|
||||
label = np.asarray(label)
|
||||
latency = 0
|
||||
if pred is None:
|
||||
predict = score < threshold
|
||||
else:
|
||||
predict = pred
|
||||
actual = label > 0.1
|
||||
anomaly_state = False
|
||||
anomaly_count = 0
|
||||
for i in range(len(score)):
|
||||
if actual[i] and predict[i] and not anomaly_state:
|
||||
anomaly_state = True
|
||||
anomaly_count += 1
|
||||
for j in range(i, 0, -1):
|
||||
if not actual[j]:
|
||||
break
|
||||
else:
|
||||
if not predict[j]:
|
||||
predict[j] = True
|
||||
latency += 1
|
||||
elif not actual[i]:
|
||||
anomaly_state = False
|
||||
if anomaly_state:
|
||||
predict[i] = True
|
||||
if calc_latency:
|
||||
return predict, latency / (anomaly_count + 1e-4)
|
||||
else:
|
||||
return predict
|
||||
|
||||
|
||||
def evaluate(y_true: list, y_pred: list) -> float:
|
||||
"""
|
||||
F1PA评估方法,经过point adjust调整标签后再用F1评分
|
||||
:param y_true: 真实标签
|
||||
:param y_pred: 检测标签
|
||||
:return: 经过pa调整后的f1、recall、precision
|
||||
"""
|
||||
y_true, y_pred = y_true.copy(), y_pred.copy()
|
||||
adjust_y_pred = adjust_predicts(score=np.array([0] * len(y_true)), label=y_true, pred=y_pred)
|
||||
f1 = f1_score(y_true, adjust_y_pred)
|
||||
return f1
|
||||
|
||||
156
evaluation/ftad.py
Normal file
156
evaluation/ftad.py
Normal file
@@ -0,0 +1,156 @@
|
||||
import math
|
||||
import numpy as np
|
||||
|
||||
|
||||
def evaluate(y_true: [int], y_pred: [int], pos_label: int = 1, max_segment: int = 0) -> float:
|
||||
"""
|
||||
基于异常段计算F值
|
||||
|
||||
:param y_true: 真实标签
|
||||
:param y_pred: 检测标签
|
||||
:param pos_label: 检测的目标数值,即指定哪个数为异常数值
|
||||
:param max_segment: 异常段最大长度
|
||||
:return: 段F值
|
||||
"""
|
||||
p_tad = precision_tad(y_true=y_true, y_pred=y_pred, pos_label=pos_label, max_segment=max_segment)
|
||||
r_tad = recall_tad(y_true=y_true, y_pred=y_pred, pos_label=pos_label, max_segment=max_segment)
|
||||
score = 0
|
||||
if p_tad and r_tad:
|
||||
score = 2 * p_tad * r_tad / (p_tad + r_tad)
|
||||
return score
|
||||
|
||||
|
||||
def recall_tad(y_true: [int], y_pred: [int], pos_label: int = 1, max_segment: int = 0) -> float:
|
||||
"""
|
||||
基于异常段计算召回率
|
||||
|
||||
:param y_true: 真实标签
|
||||
:param y_pred: 检测标签
|
||||
:param pos_label: 检测的目标数值,即指定哪个数为异常数值
|
||||
:param max_segment: 异常段最大长度
|
||||
:return: 段召回率
|
||||
"""
|
||||
if max_segment == 0:
|
||||
max_segment = get_max_segment(y_true=y_true, pos_label=pos_label)
|
||||
score = tp_count(y_true, y_pred, pos_label=pos_label, max_segment=max_segment)
|
||||
return score
|
||||
|
||||
|
||||
def precision_tad(y_true: [int], y_pred: [int], pos_label: int = 1, max_segment: int = 0) -> float:
|
||||
"""
|
||||
基于异常段计算精确率
|
||||
|
||||
:param y_true: 真实标签
|
||||
:param y_pred: 检测标签
|
||||
:param pos_label: 检测的目标数值,即指定哪个数为异常数值
|
||||
:param max_segment: 异常段最大长度
|
||||
:return: 段精确率
|
||||
"""
|
||||
if max_segment == 0:
|
||||
max_segment = get_max_segment(y_true=y_true, pos_label=pos_label)
|
||||
score = tp_count(y_pred, y_true, pos_label=pos_label, max_segment=max_segment)
|
||||
return score
|
||||
|
||||
|
||||
def tp_count(y_true: [int], y_pred: [int], max_segment: int = 0, pos_label: int = 1) -> float:
|
||||
"""
|
||||
计算段的评分,交换y_true和y_pred可以分别表示召回率与精确率。
|
||||
|
||||
:param y_true: 真实标签
|
||||
:param y_pred: 检测标签
|
||||
:param pos_label: 检测的目标数值,即指定哪个数为异常数值
|
||||
:param max_segment: 异常段最大长度
|
||||
:return: 分数
|
||||
"""
|
||||
if len(y_true) != len(y_pred):
|
||||
raise ValueError("y_true and y_pred should have the same length.")
|
||||
neg_label = 1 - pos_label
|
||||
position = 0
|
||||
tp_list = []
|
||||
if max_segment == 0:
|
||||
raise ValueError("max segment length is 0")
|
||||
while position < len(y_true):
|
||||
if y_true[position] == neg_label:
|
||||
position += 1
|
||||
continue
|
||||
elif y_true[position] == pos_label:
|
||||
start = position
|
||||
while position < len(y_true) and y_true[position] == pos_label and position - start < max_segment:
|
||||
position += 1
|
||||
end = position
|
||||
true_window = [weight_line(i/(end-start)) for i in range(end-start)]
|
||||
true_window = softmax(true_window)
|
||||
pred_window = np.array(y_pred[start:end])
|
||||
pred_window = np.where(pred_window == pos_label, true_window, 0)
|
||||
tp_list.append(sum(pred_window))
|
||||
else:
|
||||
raise ValueError("label value must be 0 or 1")
|
||||
score = sum(tp_list) / len(tp_list) if len(tp_list) > 0 else 0
|
||||
return score
|
||||
|
||||
|
||||
def weight_line(position: float) -> float:
|
||||
"""
|
||||
按照权重曲线,给不同位置的点赋值
|
||||
|
||||
:param position: 点在段中的相对位置,取值范围[0,1]
|
||||
:return: 权重值
|
||||
"""
|
||||
if position < 0 or position > 1:
|
||||
raise ValueError(f"point position in segment need between 0 and 1, {position} is error position")
|
||||
sigma = 1 / (1 + math.exp(10*(position-0.5)))
|
||||
return sigma
|
||||
|
||||
|
||||
def softmax(x: [float]) -> [float]:
|
||||
"""
|
||||
softmax函数
|
||||
:param x: 一个异常段的数据
|
||||
:return: 经过softmax的一段数据
|
||||
"""
|
||||
ret = np.exp(x)/np.sum(np.exp(x), axis=0)
|
||||
return ret.tolist()
|
||||
|
||||
|
||||
def get_max_segment(y_true: [int], pos_label: int = 1) -> int:
|
||||
"""
|
||||
获取最大的异常段的长度
|
||||
:param y_true: 真实标签
|
||||
:param pos_label: 异常标签的取值
|
||||
:return: 最大长度
|
||||
"""
|
||||
max_num, i = 0, 0
|
||||
neg_label = 1 - pos_label
|
||||
while i < len(y_true):
|
||||
if y_true[i] == neg_label:
|
||||
i += 1
|
||||
continue
|
||||
elif y_true[i] == pos_label:
|
||||
start = i
|
||||
while i < len(y_true) and y_true[i] == pos_label:
|
||||
i += 1
|
||||
end = i
|
||||
max_num = max(max_num, end-start)
|
||||
else:
|
||||
raise ValueError("label value must be 0 or 1")
|
||||
return max_num
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
# y_true = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
# 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
|
||||
# y_pred = [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
# 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
|
||||
import pandas as pd
|
||||
data = pd.read_csv("../records/2023-04-10_10-30-27/detection_result/MtadGatAtt_SWAT.csv")
|
||||
y_true = data["true"].tolist()
|
||||
y_pred = data["ftad"].tolist()
|
||||
|
||||
print(evaluate(y_true, y_pred))
|
||||
# print(precision_tad(y_true, y_pred))
|
||||
# print(recall_tad(y_true, y_pred))
|
||||
# from sklearn.metrics import f1_score, precision_score, recall_score
|
||||
# print(f1_score(y_true, y_pred))
|
||||
# print(precision_score(y_true, y_pred))
|
||||
# print(recall_score(y_true, y_pred))
|
||||
10
evaluation/template.py
Normal file
10
evaluation/template.py
Normal file
@@ -0,0 +1,10 @@
|
||||
|
||||
|
||||
def evaluate(y_true: [int], y_pred: [int]) -> float:
|
||||
"""
|
||||
评估方法
|
||||
:param y_true: 真实标签
|
||||
:param y_pred: 检测标签
|
||||
:return: 分数,float类型
|
||||
"""
|
||||
return 0.0
|
||||
169
main.py
Normal file
169
main.py
Normal file
@@ -0,0 +1,169 @@
|
||||
# -*- coding:utf-8 -*-
|
||||
import configparser
|
||||
from daemon import Daemon
|
||||
import sys
|
||||
from torch.cuda import is_available
|
||||
from loguru import logger
|
||||
from datetime import datetime
|
||||
from utils import create_all_dataloader, train_and_test_model
|
||||
import method
|
||||
import gc
|
||||
import traceback
|
||||
import pandas
|
||||
import os
|
||||
|
||||
|
||||
class Config(configparser.ConfigParser):
|
||||
def __init__(self, defaults=None):
|
||||
configparser.ConfigParser.__init__(self, defaults=defaults)
|
||||
|
||||
def optionxform(self, optionstr):
|
||||
return optionstr
|
||||
|
||||
def as_dict(self):
|
||||
d = dict(self._sections)
|
||||
for k in d:
|
||||
d[k] = dict(d[k])
|
||||
return d
|
||||
|
||||
|
||||
class Main(Daemon):
|
||||
def __init__(self, pidfile):
|
||||
super(Main, self).__init__(pidfile=pidfile)
|
||||
current = datetime.now()
|
||||
self.start_time = current.strftime("%Y-%m-%d_%H-%M-%S")
|
||||
|
||||
if len(sys.argv) == 1 or sys.argv[1] == "start":
|
||||
self.run()
|
||||
elif sys.argv[1] == "stop":
|
||||
self.stop()
|
||||
elif sys.argv[1] == "daemon":
|
||||
self.start()
|
||||
else:
|
||||
print("Input format error. Please input: python3 main.py start|stop|daemon")
|
||||
sys.exit(0)
|
||||
|
||||
def run(self):
|
||||
# 读取配置文件参数
|
||||
cf = Config()
|
||||
cf.read("./config.ini", encoding='utf8')
|
||||
cf_dict = cf.as_dict()
|
||||
# 读取数据集名称
|
||||
dataset_names = cf.get("Dataset", "name")
|
||||
datasets = dataset_names.split(",")
|
||||
datasets = [name.strip() for name in datasets]
|
||||
# 读取模型名称
|
||||
model_names = cf.get("Method", "name")
|
||||
models = model_names.split(",")
|
||||
models = [name.strip() for name in models]
|
||||
# 读取预处理方法
|
||||
preprocess_name = cf.get("Preprocess", "name")
|
||||
# 读取评估方法
|
||||
evaluation_names = cf.get("Evaluation", "name")
|
||||
evaluations = evaluation_names.split(",")
|
||||
evaluations = [name.strip() for name in evaluations]
|
||||
# 读取模型参数文件路径
|
||||
model_path = cf_dict["ModelPath"]
|
||||
# 读取训练参数
|
||||
train = cf.getboolean("BaseParameters", "train")
|
||||
epochs = cf.getint("BaseParameters", "epochs")
|
||||
batch_size = cf.getint("BaseParameters", "batch_size")
|
||||
learning_rate = cf.getfloat("BaseParameters", "learning_rate")
|
||||
device = cf.get("BaseParameters", "device")
|
||||
if device == "auto":
|
||||
device = 'cuda:0' if is_available() else 'cpu'
|
||||
# 读取自定义参数
|
||||
customs = cf_dict["CustomParameters"]
|
||||
|
||||
# 建立本次实验记录的路径
|
||||
os.makedirs(f"./records", exist_ok=True)
|
||||
os.makedirs(f"./records/{self.start_time}", exist_ok=True)
|
||||
os.makedirs(f"./records/{self.start_time}/detection_result", exist_ok=True)
|
||||
os.makedirs(f"./records/{self.start_time}/model", exist_ok=True)
|
||||
|
||||
# 初始化日志
|
||||
logger.add(f"./records/{self.start_time}/log",
|
||||
level='DEBUG',
|
||||
format='{time:YYYY-MM-DD HH:mm:ss} - {level} - {file} - {line} - {message}',
|
||||
rotation="100 MB")
|
||||
|
||||
# 核心程序
|
||||
self.core(models, datasets, preprocess_name, evaluations, model_path, train, epochs, batch_size, learning_rate,
|
||||
device, customs)
|
||||
logger.info(f"实验结束,关闭进程")
|
||||
|
||||
def core(self, models: [str], datasets: [str], preprocess_name: str, evaluations: [str], model_path: {}, train: bool,
|
||||
epochs: int, batch_size: int, learning_rate: float, device: str, customs: {}):
|
||||
"""
|
||||
初始化数据集与模型,并开始训练与测试
|
||||
:param models: 训练的模型名称,可包含多个
|
||||
:param datasets: 使用的数据集名称,可包含多个
|
||||
:param preprocess_name: 预处理方法名称
|
||||
:param evaluations: 评估方法名称,可包含多个
|
||||
:param model_path: 需要加载模型参数的路径,可包含多个
|
||||
:param train: 是否训练,如果为False,则仅测试模型
|
||||
:param epochs: 总训练轮数
|
||||
:param batch_size: batch的尺寸
|
||||
:param learning_rate: 学习率
|
||||
:param device: 设备
|
||||
:param customs: 自定义参数
|
||||
"""
|
||||
logger.info(f"加载数据集")
|
||||
try:
|
||||
# 初始化所有数据集
|
||||
all_dataloader = create_all_dataloader(datasets=datasets, input_size=int(customs["input_size"]),
|
||||
output_size=int(customs["output_size"]), step=int(customs["step"]),
|
||||
batch_size=batch_size, time_index=customs["time_index"] == "true",
|
||||
del_column_name=customs["del_column_name"] == "true",
|
||||
preprocess_name=preprocess_name)
|
||||
except RuntimeError:
|
||||
logger.error(traceback.format_exc())
|
||||
return
|
||||
|
||||
# 开始训练与测试
|
||||
|
||||
for model_name in models:
|
||||
try:
|
||||
logger.info(f"------------华丽丽的分界线:{model_name} 实验开始------------")
|
||||
for i in range(len(all_dataloader)):
|
||||
dataloader = all_dataloader[i]
|
||||
all_score = {}
|
||||
for sub_dataloader in dataloader:
|
||||
dataset_name = sub_dataloader["dataset_name"]
|
||||
normal_dataloader = sub_dataloader["normal"]
|
||||
attack_dataloader = sub_dataloader["attack"]
|
||||
logger.info(f"初始化模型 {model_name}")
|
||||
model = eval(f"method.{model_name}.Model")(customs=customs, dataloader=normal_dataloader)
|
||||
model = model.to(device)
|
||||
logger.info(f"模型初始化完成")
|
||||
pth_name = model_path[f"{model_name}_{dataset_name}"] if f"{model_name}_{dataset_name}" \
|
||||
in model_path else None
|
||||
best_score, best_detection = train_and_test_model(start_time=self.start_time, epochs=epochs,
|
||||
normal_dataloader=normal_dataloader,
|
||||
attack_dataloader=attack_dataloader,
|
||||
model=model, evaluations=evaluations,
|
||||
device=device, lr=learning_rate,
|
||||
model_path=pth_name, train=train)
|
||||
# 保存最佳检测结果的标签为csv文件
|
||||
best_detection = pandas.DataFrame(best_detection)
|
||||
best_detection.to_csv(f"./records/{self.start_time}/detection_result/{model_name}_{dataset_name}.csv", index=False)
|
||||
for evaluation_name in evaluations:
|
||||
if evaluation_name not in all_score:
|
||||
all_score[evaluation_name] = []
|
||||
all_score[evaluation_name].append(best_score[evaluation_name])
|
||||
gc.collect()
|
||||
logger.info(f"------------------------")
|
||||
logger.info(f"{model_name} / {datasets[i]} 实验完毕")
|
||||
for evaluation_name in all_score:
|
||||
logger.info(f"{evaluation_name}: {'{:.3f}'.format(sum(all_score[evaluation_name]) / len(all_score[evaluation_name]))}")
|
||||
logger.info(f"------------------------")
|
||||
logger.info(f"------------华丽丽的分界线:{model_name} 实验结束------------")
|
||||
except RuntimeError:
|
||||
logger.error(traceback.format_exc())
|
||||
return
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pidpath = "/tmp/command_detection.pid"
|
||||
app = Main(pidpath)
|
||||
|
||||
305
method/AnomalyTransformer.py
Normal file
305
method/AnomalyTransformer.py
Normal file
@@ -0,0 +1,305 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import numpy as np
|
||||
import math
|
||||
from math import sqrt
|
||||
import torch.utils.data as tud
|
||||
|
||||
|
||||
class PositionalEmbedding(nn.Module):
|
||||
def __init__(self, d_model, max_len=5000):
|
||||
super(PositionalEmbedding, self).__init__()
|
||||
# Compute the positional encodings once in log space.
|
||||
pe = torch.zeros(max_len, d_model).float()
|
||||
pe.require_grad = False
|
||||
|
||||
position = torch.arange(0, max_len).float().unsqueeze(1)
|
||||
div_term = (torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)).exp()
|
||||
|
||||
pe[:, 0::2] = torch.sin(position * div_term)
|
||||
pe[:, 1::2] = torch.cos(position * div_term)
|
||||
|
||||
pe = pe.unsqueeze(0)
|
||||
self.register_buffer('pe', pe)
|
||||
|
||||
def forward(self, x):
|
||||
return self.pe[:, :x.size(1)]
|
||||
|
||||
|
||||
class TokenEmbedding(nn.Module):
|
||||
def __init__(self, c_in, d_model):
|
||||
super(TokenEmbedding, self).__init__()
|
||||
padding = 1 if torch.__version__ >= '1.5.0' else 2
|
||||
self.tokenConv = nn.Conv1d(in_channels=c_in, out_channels=d_model,
|
||||
kernel_size=3, padding=padding, padding_mode='circular', bias=False)
|
||||
for m in self.modules():
|
||||
if isinstance(m, nn.Conv1d):
|
||||
nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='leaky_relu')
|
||||
|
||||
def forward(self, x):
|
||||
x = self.tokenConv(x.permute(0, 2, 1)).transpose(1, 2)
|
||||
return x
|
||||
|
||||
|
||||
class DataEmbedding(nn.Module):
|
||||
def __init__(self, c_in, d_model, dropout=0.0):
|
||||
super(DataEmbedding, self).__init__()
|
||||
|
||||
self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model)
|
||||
self.position_embedding = PositionalEmbedding(d_model=d_model)
|
||||
|
||||
self.dropout = nn.Dropout(p=dropout)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.value_embedding(x) + self.position_embedding(x)
|
||||
return self.dropout(x)
|
||||
|
||||
|
||||
class TriangularCausalMask():
|
||||
def __init__(self, B, L, device="cpu"):
|
||||
mask_shape = [B, 1, L, L]
|
||||
with torch.no_grad():
|
||||
self._mask = torch.triu(torch.ones(mask_shape, dtype=torch.bool), diagonal=1).to(device)
|
||||
|
||||
@property
|
||||
def mask(self):
|
||||
return self._mask
|
||||
|
||||
|
||||
class AnomalyAttention(nn.Module):
|
||||
def __init__(self, win_size, mask_flag=True, scale=None, attention_dropout=0.0, output_attention=False):
|
||||
super(AnomalyAttention, self).__init__()
|
||||
self.scale = scale
|
||||
self.mask_flag = mask_flag
|
||||
self.output_attention = output_attention
|
||||
self.dropout = nn.Dropout(attention_dropout)
|
||||
window_size = win_size
|
||||
self.distances = torch.zeros((window_size, window_size)).cuda()
|
||||
for i in range(window_size):
|
||||
for j in range(window_size):
|
||||
self.distances[i][j] = abs(i - j)
|
||||
|
||||
def forward(self, queries, keys, values, sigma, attn_mask):
|
||||
B, L, H, E = queries.shape
|
||||
_, S, _, D = values.shape
|
||||
scale = self.scale or 1. / sqrt(E)
|
||||
|
||||
scores = torch.einsum("blhe,bshe->bhls", queries, keys)
|
||||
if self.mask_flag:
|
||||
if attn_mask is None:
|
||||
attn_mask = TriangularCausalMask(B, L, device=queries.device)
|
||||
scores.masked_fill_(attn_mask.mask, -np.inf)
|
||||
attn = scale * scores
|
||||
|
||||
sigma = sigma.transpose(1, 2) # B L H -> B H L
|
||||
window_size = attn.shape[-1]
|
||||
sigma = torch.sigmoid(sigma * 5) + 1e-5
|
||||
sigma = torch.pow(3, sigma) - 1
|
||||
sigma = sigma.unsqueeze(-1).repeat(1, 1, 1, window_size) # B H L L
|
||||
prior = self.distances.unsqueeze(0).unsqueeze(0).repeat(sigma.shape[0], sigma.shape[1], 1, 1).cuda()
|
||||
prior = 1.0 / (math.sqrt(2 * math.pi) * sigma) * torch.exp(-prior ** 2 / 2 / (sigma ** 2))
|
||||
|
||||
series = self.dropout(torch.softmax(attn, dim=-1))
|
||||
V = torch.einsum("bhls,bshd->blhd", series, values)
|
||||
|
||||
if self.output_attention:
|
||||
return (V.contiguous(), series, prior, sigma)
|
||||
else:
|
||||
return (V.contiguous(), None)
|
||||
|
||||
|
||||
class AttentionLayer(nn.Module):
|
||||
def __init__(self, attention, d_model, n_heads, d_keys=None,
|
||||
d_values=None):
|
||||
super(AttentionLayer, self).__init__()
|
||||
|
||||
d_keys = d_keys or (d_model // n_heads)
|
||||
d_values = d_values or (d_model // n_heads)
|
||||
self.norm = nn.LayerNorm(d_model)
|
||||
self.inner_attention = attention
|
||||
self.query_projection = nn.Linear(d_model,
|
||||
d_keys * n_heads)
|
||||
self.key_projection = nn.Linear(d_model,
|
||||
d_keys * n_heads)
|
||||
self.value_projection = nn.Linear(d_model,
|
||||
d_values * n_heads)
|
||||
self.sigma_projection = nn.Linear(d_model,
|
||||
n_heads)
|
||||
self.out_projection = nn.Linear(d_values * n_heads, d_model)
|
||||
|
||||
self.n_heads = n_heads
|
||||
|
||||
def forward(self, queries, keys, values, attn_mask):
|
||||
B, L, _ = queries.shape
|
||||
_, S, _ = keys.shape
|
||||
H = self.n_heads
|
||||
x = queries
|
||||
queries = self.query_projection(queries).view(B, L, H, -1)
|
||||
keys = self.key_projection(keys).view(B, S, H, -1)
|
||||
values = self.value_projection(values).view(B, S, H, -1)
|
||||
sigma = self.sigma_projection(x).view(B, L, H)
|
||||
|
||||
out, series, prior, sigma = self.inner_attention(
|
||||
queries,
|
||||
keys,
|
||||
values,
|
||||
sigma,
|
||||
attn_mask
|
||||
)
|
||||
out = out.view(B, L, -1)
|
||||
|
||||
return self.out_projection(out), series, prior, sigma
|
||||
|
||||
|
||||
class EncoderLayer(nn.Module):
|
||||
def __init__(self, attention, d_model, d_ff=None, dropout=0.1, activation="relu"):
|
||||
super(EncoderLayer, self).__init__()
|
||||
d_ff = d_ff or 4 * d_model
|
||||
self.attention = attention
|
||||
self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
|
||||
self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)
|
||||
self.norm1 = nn.LayerNorm(d_model)
|
||||
self.norm2 = nn.LayerNorm(d_model)
|
||||
self.dropout = nn.Dropout(dropout)
|
||||
self.activation = F.relu if activation == "relu" else F.gelu
|
||||
|
||||
def forward(self, x, attn_mask=None):
|
||||
new_x, attn, mask, sigma = self.attention(
|
||||
x, x, x,
|
||||
attn_mask=attn_mask
|
||||
)
|
||||
x = x + self.dropout(new_x)
|
||||
y = x = self.norm1(x)
|
||||
y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
|
||||
y = self.dropout(self.conv2(y).transpose(-1, 1))
|
||||
|
||||
return self.norm2(x + y), attn, mask, sigma
|
||||
|
||||
|
||||
class Encoder(nn.Module):
|
||||
def __init__(self, attn_layers, norm_layer=None):
|
||||
super(Encoder, self).__init__()
|
||||
self.attn_layers = nn.ModuleList(attn_layers)
|
||||
self.norm = norm_layer
|
||||
|
||||
def forward(self, x, attn_mask=None):
|
||||
# x [B, L, D]
|
||||
series_list = []
|
||||
prior_list = []
|
||||
sigma_list = []
|
||||
for attn_layer in self.attn_layers:
|
||||
x, series, prior, sigma = attn_layer(x, attn_mask=attn_mask)
|
||||
series_list.append(series)
|
||||
prior_list.append(prior)
|
||||
sigma_list.append(sigma)
|
||||
|
||||
if self.norm is not None:
|
||||
x = self.norm(x)
|
||||
|
||||
return x, series_list, prior_list, sigma_list
|
||||
|
||||
|
||||
class Model(nn.Module):
|
||||
def __init__(self, customs: {}, dataloader: tud.DataLoader):
|
||||
super(Model, self).__init__()
|
||||
win_size = int(customs["input_size"])
|
||||
enc_in = c_out = dataloader.dataset.train_inputs.shape[-1]
|
||||
d_model = 512
|
||||
n_heads = 8
|
||||
e_layers = 3
|
||||
d_ff = 512
|
||||
dropout = 0.0
|
||||
activation = 'gelu'
|
||||
output_attention = True
|
||||
self.k = 3
|
||||
self.win_size = win_size
|
||||
|
||||
self.name = "AnomalyTransformer"
|
||||
# Encoding
|
||||
self.embedding = DataEmbedding(enc_in, d_model, dropout)
|
||||
|
||||
# Encoder
|
||||
self.encoder = Encoder(
|
||||
[
|
||||
EncoderLayer(
|
||||
AttentionLayer(
|
||||
AnomalyAttention(win_size, False, attention_dropout=dropout, output_attention=output_attention),
|
||||
d_model, n_heads),
|
||||
d_model,
|
||||
d_ff,
|
||||
dropout=dropout,
|
||||
activation=activation
|
||||
) for l in range(e_layers)
|
||||
],
|
||||
norm_layer=torch.nn.LayerNorm(d_model)
|
||||
)
|
||||
|
||||
self.projection = nn.Linear(d_model, c_out, bias=True)
|
||||
|
||||
def forward(self, x):
|
||||
enc_out = self.embedding(x)
|
||||
enc_out, series, prior, sigmas = self.encoder(enc_out)
|
||||
enc_out = self.projection(enc_out)
|
||||
return enc_out, series, prior, sigmas
|
||||
|
||||
@staticmethod
|
||||
def my_kl_loss(p, q):
|
||||
res = p * (torch.log(p + 0.0001) - torch.log(q + 0.0001))
|
||||
return torch.mean(torch.sum(res, dim=-1), dim=1)
|
||||
|
||||
def loss(self, x, y_true, epoch: int = None, device: str = "cpu"):
|
||||
output, series, prior, _ = self.forward(x)
|
||||
series_loss = 0.0
|
||||
prior_loss = 0.0
|
||||
for u in range(len(prior)):
|
||||
series_loss += (torch.mean(self.my_kl_loss(series[u], (prior[u] / torch.unsqueeze(torch.sum(prior[u], dim=-1), dim=-1).repeat(1, 1, 1, self.win_size)).detach())) +
|
||||
torch.mean(self.my_kl_loss((prior[u] / torch.unsqueeze(torch.sum(prior[u], dim=-1), dim=-1).repeat(1, 1, 1, self.win_size)).detach(), series[u])))
|
||||
|
||||
prior_loss += (torch.mean(self.my_kl_loss((prior[u] / torch.unsqueeze(torch.sum(prior[u], dim=-1), dim=-1).repeat(1, 1, 1, self.win_size)), series[u].detach())) +
|
||||
torch.mean(self.my_kl_loss(series[u].detach(), (prior[u] / torch.unsqueeze(torch.sum(prior[u], dim=-1), dim=-1).repeat(1, 1, 1, self.win_size)))))
|
||||
series_loss = series_loss / len(prior)
|
||||
prior_loss = prior_loss / len(prior)
|
||||
rec_loss = nn.MSELoss()(output, x)
|
||||
|
||||
loss1 = rec_loss - self.k * series_loss
|
||||
loss2 = rec_loss + self.k * prior_loss
|
||||
|
||||
# Minimax strategy
|
||||
loss1.backward(retain_graph=True)
|
||||
loss2.backward()
|
||||
|
||||
return loss1.item()
|
||||
|
||||
def detection(self, x, y_true, epoch: int = None, device: str = "cpu"):
|
||||
temperature = 50
|
||||
output, series, prior, _ = self.forward(x)
|
||||
|
||||
loss = torch.mean(nn.MSELoss()(x, output), dim=-1)
|
||||
|
||||
series_loss = 0.0
|
||||
prior_loss = 0.0
|
||||
for u in range(len(prior)):
|
||||
if u == 0:
|
||||
series_loss = self.my_kl_loss(series[u], (
|
||||
prior[u] / torch.unsqueeze(torch.sum(prior[u], dim=-1), dim=-1).repeat(1, 1, 1,
|
||||
self.win_size)).detach()) * temperature
|
||||
prior_loss = self.my_kl_loss(
|
||||
(prior[u] / torch.unsqueeze(torch.sum(prior[u], dim=-1), dim=-1).repeat(1, 1, 1,
|
||||
self.win_size)),
|
||||
series[u].detach()) * temperature
|
||||
else:
|
||||
series_loss += self.my_kl_loss(series[u], (
|
||||
prior[u] / torch.unsqueeze(torch.sum(prior[u], dim=-1), dim=-1).repeat(1, 1, 1,
|
||||
self.win_size)).detach()) * temperature
|
||||
prior_loss += self.my_kl_loss(
|
||||
(prior[u] / torch.unsqueeze(torch.sum(prior[u], dim=-1), dim=-1).repeat(1, 1, 1,
|
||||
self.win_size)),
|
||||
series[u].detach()) * temperature
|
||||
metric = torch.softmax((-series_loss - prior_loss), dim=-1)
|
||||
|
||||
cri = metric * loss
|
||||
cri = cri.mean(dim=-1)
|
||||
return cri, None
|
||||
|
||||
|
||||
414
method/MtadGat.py
Normal file
414
method/MtadGat.py
Normal file
@@ -0,0 +1,414 @@
|
||||
"""
|
||||
github找的第三方实现的Mtad-Gat源码
|
||||
由于源码使用了torch.empty,经常导致loss为nan,因此替换为了torch.zeros
|
||||
"""
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import torch.utils.data as tud
|
||||
|
||||
|
||||
class ConvLayer(nn.Module):
|
||||
"""1-D Convolution layer to extract high-level features of each time-series input
|
||||
:param n_features: Number of input features/nodes
|
||||
:param window_size: length of the input sequence
|
||||
:param kernel_size: size of kernel to use in the convolution operation
|
||||
"""
|
||||
|
||||
def __init__(self, n_features, kernel_size=7):
|
||||
super(ConvLayer, self).__init__()
|
||||
self.padding = nn.ConstantPad1d((kernel_size - 1) // 2, 0.0)
|
||||
self.conv = nn.Conv1d(in_channels=n_features, out_channels=n_features, kernel_size=kernel_size)
|
||||
self.relu = nn.ReLU()
|
||||
|
||||
def forward(self, x):
|
||||
x = x.permute(0, 2, 1)
|
||||
x = self.padding(x)
|
||||
x = self.relu(self.conv(x))
|
||||
return x.permute(0, 2, 1) # Permute back
|
||||
|
||||
|
||||
class FeatureAttentionLayer(nn.Module):
|
||||
"""Single Graph Feature/Spatial Attention Layer
|
||||
:param n_features: Number of input features/nodes
|
||||
:param window_size: length of the input sequence
|
||||
:param dropout: percentage of nodes to dropout
|
||||
:param alpha: negative slope used in the leaky rely activation function
|
||||
:param embed_dim: embedding dimension (output dimension of linear transformation)
|
||||
:param use_gatv2: whether to use the modified attention mechanism of GATv2 instead of standard GAT
|
||||
:param use_bias: whether to include a bias term in the attention layer
|
||||
"""
|
||||
|
||||
def __init__(self, n_features, window_size, dropout, alpha, embed_dim=None, use_gatv2=True, use_bias=True):
|
||||
super(FeatureAttentionLayer, self).__init__()
|
||||
self.n_features = n_features
|
||||
self.window_size = window_size
|
||||
self.dropout = dropout
|
||||
self.embed_dim = embed_dim if embed_dim is not None else window_size
|
||||
self.use_gatv2 = use_gatv2
|
||||
self.num_nodes = n_features
|
||||
self.use_bias = use_bias
|
||||
|
||||
# Because linear transformation is done after concatenation in GATv2
|
||||
if self.use_gatv2:
|
||||
self.embed_dim *= 2
|
||||
lin_input_dim = 2 * window_size
|
||||
a_input_dim = self.embed_dim
|
||||
else:
|
||||
lin_input_dim = window_size
|
||||
a_input_dim = 2 * self.embed_dim
|
||||
|
||||
self.lin = nn.Linear(lin_input_dim, self.embed_dim)
|
||||
# self.a = nn.Parameter(torch.empty((a_input_dim, 1)))
|
||||
self.a = nn.Parameter(torch.zeros((a_input_dim, 1)))
|
||||
nn.init.xavier_uniform_(self.a.data, gain=1.414)
|
||||
|
||||
if self.use_bias:
|
||||
# self.bias = nn.Parameter(torch.empty(n_features, n_features))
|
||||
self.bias = nn.Parameter(torch.zeros(n_features, n_features))
|
||||
|
||||
self.leakyrelu = nn.LeakyReLU(alpha)
|
||||
self.sigmoid = nn.Sigmoid()
|
||||
|
||||
def forward(self, x):
|
||||
# x shape (b, n, k): b - batch size, n - window size, k - number of features
|
||||
# For feature attention we represent a node as the values of a particular feature across all timestamps
|
||||
|
||||
x = x.permute(0, 2, 1)
|
||||
|
||||
# 'Dynamic' GAT attention
|
||||
# Proposed by Brody et. al., 2021 (https://arxiv.org/pdf/2105.14491.pdf)
|
||||
# Linear transformation applied after concatenation and attention layer applied after leakyrelu
|
||||
if self.use_gatv2:
|
||||
a_input = self._make_attention_input(x) # (b, k, k, 2*window_size)
|
||||
a_input = self.leakyrelu(self.lin(a_input)) # (b, k, k, embed_dim)
|
||||
e = torch.matmul(a_input, self.a).squeeze(3) # (b, k, k, 1)
|
||||
|
||||
# Original GAT attention
|
||||
else:
|
||||
Wx = self.lin(x) # (b, k, k, embed_dim)
|
||||
a_input = self._make_attention_input(Wx) # (b, k, k, 2*embed_dim)
|
||||
e = self.leakyrelu(torch.matmul(a_input, self.a)).squeeze(3) # (b, k, k, 1)
|
||||
|
||||
if self.use_bias:
|
||||
e += self.bias
|
||||
|
||||
# Attention weights
|
||||
attention = torch.softmax(e, dim=2)
|
||||
attention = torch.dropout(attention, self.dropout, train=self.training)
|
||||
|
||||
# Computing new node features using the attention
|
||||
h = self.sigmoid(torch.matmul(attention, x))
|
||||
|
||||
return h.permute(0, 2, 1)
|
||||
|
||||
def _make_attention_input(self, v):
|
||||
"""Preparing the feature attention mechanism.
|
||||
Creating matrix with all possible combinations of concatenations of node.
|
||||
Each node consists of all values of that node within the window
|
||||
v1 || v1,
|
||||
...
|
||||
v1 || vK,
|
||||
v2 || v1,
|
||||
...
|
||||
v2 || vK,
|
||||
...
|
||||
...
|
||||
vK || v1,
|
||||
...
|
||||
vK || vK,
|
||||
"""
|
||||
|
||||
K = self.num_nodes
|
||||
blocks_repeating = v.repeat_interleave(K, dim=1) # Left-side of the matrix
|
||||
blocks_alternating = v.repeat(1, K, 1) # Right-side of the matrix
|
||||
combined = torch.cat((blocks_repeating, blocks_alternating), dim=2) # (b, K*K, 2*window_size)
|
||||
|
||||
if self.use_gatv2:
|
||||
return combined.view(v.size(0), K, K, 2 * self.window_size)
|
||||
else:
|
||||
return combined.view(v.size(0), K, K, 2 * self.embed_dim)
|
||||
|
||||
|
||||
class TemporalAttentionLayer(nn.Module):
|
||||
"""Single Graph Temporal Attention Layer
|
||||
:param n_features: number of input features/nodes
|
||||
:param window_size: length of the input sequence
|
||||
:param dropout: percentage of nodes to dropout
|
||||
:param alpha: negative slope used in the leaky rely activation function
|
||||
:param embed_dim: embedding dimension (output dimension of linear transformation)
|
||||
:param use_gatv2: whether to use the modified attention mechanism of GATv2 instead of standard GAT
|
||||
:param use_bias: whether to include a bias term in the attention layer
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, n_features, window_size, dropout, alpha, embed_dim=None, use_gatv2=True, use_bias=True):
|
||||
super(TemporalAttentionLayer, self).__init__()
|
||||
self.n_features = n_features
|
||||
self.window_size = window_size
|
||||
self.dropout = dropout
|
||||
self.use_gatv2 = use_gatv2
|
||||
self.embed_dim = embed_dim if embed_dim is not None else n_features
|
||||
self.num_nodes = window_size
|
||||
self.use_bias = use_bias
|
||||
|
||||
# Because linear transformation is performed after concatenation in GATv2
|
||||
if self.use_gatv2:
|
||||
self.embed_dim *= 2
|
||||
lin_input_dim = 2 * n_features
|
||||
a_input_dim = self.embed_dim
|
||||
else:
|
||||
lin_input_dim = n_features
|
||||
a_input_dim = 2 * self.embed_dim
|
||||
|
||||
self.lin = nn.Linear(lin_input_dim, self.embed_dim)
|
||||
# self.a = nn.Parameter(torch.empty((a_input_dim, 1)))
|
||||
self.a = nn.Parameter(torch.zeros((a_input_dim, 1)))
|
||||
nn.init.xavier_uniform_(self.a.data, gain=1.414)
|
||||
|
||||
if self.use_bias:
|
||||
# self.bias = nn.Parameter(torch.empty(window_size, window_size))
|
||||
self.bias = nn.Parameter(torch.zeros(window_size, window_size))
|
||||
self.leakyrelu = nn.LeakyReLU(alpha)
|
||||
self.sigmoid = nn.Sigmoid()
|
||||
|
||||
def forward(self, x):
|
||||
# x shape (b, n, k): b - batch size, n - window size, k - number of features
|
||||
# For temporal attention a node is represented as all feature values at a specific timestamp
|
||||
|
||||
# 'Dynamic' GAT attention
|
||||
# Proposed by Brody et. al., 2021 (https://arxiv.org/pdf/2105.14491.pdf)
|
||||
# Linear transformation applied after concatenation and attention layer applied after leakyrelu
|
||||
if self.use_gatv2:
|
||||
a_input = self._make_attention_input(x) # (b, n, n, 2*n_features)
|
||||
a_input = self.leakyrelu(self.lin(a_input)) # (b, n, n, embed_dim)
|
||||
e = torch.matmul(a_input, self.a).squeeze(3) # (b, n, n, 1)
|
||||
|
||||
# Original GAT attention
|
||||
else:
|
||||
Wx = self.lin(x) # (b, n, n, embed_dim)
|
||||
a_input = self._make_attention_input(Wx) # (b, n, n, 2*embed_dim)
|
||||
e = self.leakyrelu(torch.matmul(a_input, self.a)).squeeze(3) # (b, n, n, 1)
|
||||
|
||||
if self.use_bias:
|
||||
e += self.bias # (b, n, n, 1)
|
||||
|
||||
# Attention weights
|
||||
attention = torch.softmax(e, dim=2)
|
||||
attention = torch.dropout(attention, self.dropout, train=self.training)
|
||||
|
||||
h = self.sigmoid(torch.matmul(attention, x)) # (b, n, k)
|
||||
|
||||
return h
|
||||
|
||||
def _make_attention_input(self, v):
|
||||
"""Preparing the temporal attention mechanism.
|
||||
Creating matrix with all possible combinations of concatenations of node values:
|
||||
(v1, v2..)_t1 || (v1, v2..)_t1
|
||||
(v1, v2..)_t1 || (v1, v2..)_t2
|
||||
|
||||
...
|
||||
...
|
||||
|
||||
(v1, v2..)_tn || (v1, v2..)_t1
|
||||
(v1, v2..)_tn || (v1, v2..)_t2
|
||||
|
||||
"""
|
||||
|
||||
K = self.num_nodes
|
||||
blocks_repeating = v.repeat_interleave(K, dim=1) # Left-side of the matrix
|
||||
blocks_alternating = v.repeat(1, K, 1) # Right-side of the matrix
|
||||
combined = torch.cat((blocks_repeating, blocks_alternating), dim=2)
|
||||
|
||||
if self.use_gatv2:
|
||||
return combined.view(v.size(0), K, K, 2 * self.n_features)
|
||||
else:
|
||||
return combined.view(v.size(0), K, K, 2 * self.embed_dim)
|
||||
|
||||
|
||||
class GRULayer(nn.Module):
|
||||
"""Gated Recurrent Unit (GRU) Layer
|
||||
:param in_dim: number of input features
|
||||
:param hid_dim: hidden size of the GRU
|
||||
:param n_layers: number of layers in GRU
|
||||
:param dropout: dropout rate
|
||||
"""
|
||||
|
||||
def __init__(self, in_dim, hid_dim, n_layers, dropout):
|
||||
super(GRULayer, self).__init__()
|
||||
self.hid_dim = hid_dim
|
||||
self.n_layers = n_layers
|
||||
self.dropout = 0.0 if n_layers == 1 else dropout
|
||||
self.gru = nn.GRU(in_dim, hid_dim, num_layers=n_layers, batch_first=True, dropout=self.dropout)
|
||||
|
||||
def forward(self, x):
|
||||
out, h = self.gru(x)
|
||||
out, h = out[-1, :, :], h[-1, :, :] # Extracting from last layer
|
||||
return out, h
|
||||
|
||||
|
||||
class RNNDecoder(nn.Module):
|
||||
"""GRU-based Decoder network that converts latent vector into output
|
||||
:param in_dim: number of input features
|
||||
:param n_layers: number of layers in RNN
|
||||
:param hid_dim: hidden size of the RNN
|
||||
:param dropout: dropout rate
|
||||
"""
|
||||
|
||||
def __init__(self, in_dim, hid_dim, n_layers, dropout):
|
||||
super(RNNDecoder, self).__init__()
|
||||
self.in_dim = in_dim
|
||||
self.dropout = 0.0 if n_layers == 1 else dropout
|
||||
self.rnn = nn.GRU(in_dim, hid_dim, n_layers, batch_first=True, dropout=self.dropout)
|
||||
|
||||
def forward(self, x):
|
||||
decoder_out, _ = self.rnn(x)
|
||||
return decoder_out
|
||||
|
||||
|
||||
class ReconstructionModel(nn.Module):
|
||||
"""Reconstruction Model
|
||||
:param window_size: length of the input sequence
|
||||
:param in_dim: number of input features
|
||||
:param n_layers: number of layers in RNN
|
||||
:param hid_dim: hidden size of the RNN
|
||||
:param in_dim: number of output features
|
||||
:param dropout: dropout rate
|
||||
"""
|
||||
|
||||
def __init__(self, window_size, in_dim, hid_dim, out_dim, n_layers, dropout):
|
||||
super(ReconstructionModel, self).__init__()
|
||||
self.window_size = window_size
|
||||
self.decoder = RNNDecoder(in_dim, hid_dim, n_layers, dropout)
|
||||
self.fc = nn.Linear(hid_dim, out_dim)
|
||||
|
||||
def forward(self, x):
|
||||
# x will be last hidden state of the GRU layer
|
||||
h_end = x
|
||||
h_end_rep = h_end.repeat_interleave(self.window_size, dim=1).view(x.size(0), self.window_size, -1)
|
||||
|
||||
decoder_out = self.decoder(h_end_rep)
|
||||
out = self.fc(decoder_out)
|
||||
return out
|
||||
|
||||
|
||||
class Forecasting_Model(nn.Module):
|
||||
"""Forecasting model (fully-connected network)
|
||||
:param in_dim: number of input features
|
||||
:param hid_dim: hidden size of the FC network
|
||||
:param out_dim: number of output features
|
||||
:param n_layers: number of FC layers
|
||||
:param dropout: dropout rate
|
||||
"""
|
||||
|
||||
def __init__(self, in_dim, hid_dim, out_dim, n_layers, dropout):
|
||||
super(Forecasting_Model, self).__init__()
|
||||
layers = [nn.Linear(in_dim, hid_dim)]
|
||||
for _ in range(n_layers - 1):
|
||||
layers.append(nn.Linear(hid_dim, hid_dim))
|
||||
|
||||
layers.append(nn.Linear(hid_dim, out_dim))
|
||||
|
||||
self.layers = nn.ModuleList(layers)
|
||||
self.dropout = nn.Dropout(dropout)
|
||||
self.relu = nn.ReLU()
|
||||
|
||||
def forward(self, x):
|
||||
for i in range(len(self.layers) - 1):
|
||||
x = self.relu(self.layers[i](x))
|
||||
x = self.dropout(x)
|
||||
return self.layers[-1](x)
|
||||
|
||||
|
||||
class Model(nn.Module):
|
||||
""" MTAD_GAT model class.
|
||||
|
||||
:param n_features: Number of input features
|
||||
:param window_size: Length of the input sequence
|
||||
:param out_dim: Number of features to output
|
||||
:param kernel_size: size of kernel to use in the 1-D convolution
|
||||
:param feat_gat_embed_dim: embedding dimension (output dimension of linear transformation)
|
||||
in feat-oriented GAT layer
|
||||
:param time_gat_embed_dim: embedding dimension (output dimension of linear transformation)
|
||||
in time-oriented GAT layer
|
||||
:param use_gatv2: whether to use the modified attention mechanism of GATv2 instead of standard GAT
|
||||
:param gru_n_layers: number of layers in the GRU layer
|
||||
:param gru_hid_dim: hidden dimension in the GRU layer
|
||||
:param forecast_n_layers: number of layers in the FC-based Forecasting Model
|
||||
:param forecast_hid_dim: hidden dimension in the FC-based Forecasting Model
|
||||
:param recon_n_layers: number of layers in the GRU-based Reconstruction Model
|
||||
:param recon_hid_dim: hidden dimension in the GRU-based Reconstruction Model
|
||||
:param dropout: dropout rate
|
||||
:param alpha: negative slope used in the leaky rely activation function
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, customs: {}, dataloader: tud.DataLoader):
|
||||
super(Model, self).__init__()
|
||||
n_features = dataloader.dataset.train_inputs.shape[-1]
|
||||
window_size = int(customs["input_size"])
|
||||
out_dim = n_features
|
||||
kernel_size = 7
|
||||
feat_gat_embed_dim = None
|
||||
time_gat_embed_dim = None
|
||||
use_gatv2 = True
|
||||
gru_n_layers = 1
|
||||
gru_hid_dim = 150
|
||||
forecast_n_layers = 1
|
||||
forecast_hid_dim = 150
|
||||
recon_n_layers = 1
|
||||
recon_hid_dim = 150
|
||||
dropout = 0.2
|
||||
alpha = 0.2
|
||||
|
||||
self.name = "MtadGat"
|
||||
self.conv = ConvLayer(n_features, kernel_size)
|
||||
self.feature_gat = FeatureAttentionLayer(
|
||||
n_features, window_size, dropout, alpha, feat_gat_embed_dim, use_gatv2)
|
||||
self.temporal_gat = TemporalAttentionLayer(n_features, window_size, dropout, alpha, time_gat_embed_dim,
|
||||
use_gatv2)
|
||||
self.gru = GRULayer(3 * n_features, gru_hid_dim, gru_n_layers, dropout)
|
||||
self.forecasting_model = Forecasting_Model(
|
||||
gru_hid_dim, forecast_hid_dim, out_dim, forecast_n_layers, dropout)
|
||||
self.recon_model = ReconstructionModel(window_size, gru_hid_dim, recon_hid_dim, out_dim, recon_n_layers,
|
||||
dropout)
|
||||
|
||||
def forward(self, x):
|
||||
# x shape (b, n, k): b - batch size, n - window size, k - number of features
|
||||
|
||||
x = self.conv(x)
|
||||
h_feat = self.feature_gat(x)
|
||||
h_temp = self.temporal_gat(x)
|
||||
|
||||
h_cat = torch.cat([x, h_feat, h_temp], dim=2) # (b, n, 3k)
|
||||
|
||||
_, h_end = self.gru(h_cat)
|
||||
h_end = h_end.view(x.shape[0], -1) # Hidden state for last timestamp
|
||||
|
||||
predictions = self.forecasting_model(h_end)
|
||||
recons = self.recon_model(h_end)
|
||||
|
||||
return predictions, recons
|
||||
|
||||
def loss(self, x, y_true, epoch: int = None, device: str = "cpu"):
|
||||
preds, recons = self.forward(x)
|
||||
if preds.ndim == 3:
|
||||
preds = preds.squeeze(1)
|
||||
if y_true.ndim == 3:
|
||||
y_true = y_true.squeeze(1)
|
||||
forecast_criterion = nn.MSELoss()
|
||||
recon_criterion = nn.MSELoss()
|
||||
forecast_loss = torch.sqrt(forecast_criterion(y_true, preds))
|
||||
recon_loss = torch.sqrt(recon_criterion(x, recons))
|
||||
loss = forecast_loss + recon_loss
|
||||
loss.backward()
|
||||
return loss.item()
|
||||
|
||||
def detection(self, x, y_true, epoch: int = None, device: str = "cpu"):
|
||||
preds, recons = self.forward(x)
|
||||
score = F.pairwise_distance(recons.reshape(recons.size(0), -1), x.reshape(x.size(0), -1)) + \
|
||||
F.pairwise_distance(y_true.reshape(y_true.size(0), -1), preds.reshape(preds.size(0), -1))
|
||||
return score, None
|
||||
|
||||
|
||||
647
method/MtadGatAtt.py
Normal file
647
method/MtadGatAtt.py
Normal file
@@ -0,0 +1,647 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from math import sqrt
|
||||
import torch.nn.functional as F
|
||||
import numpy as np
|
||||
import torch.utils.data as tud
|
||||
|
||||
|
||||
class ConvLayer(nn.Module):
|
||||
"""1-D Convolution layer to extract high-level features of each time-series input
|
||||
:param n_features: Number of input features/nodes
|
||||
:param window_size: length of the input sequence
|
||||
:param kernel_size: size of kernel to use in the convolution operation
|
||||
"""
|
||||
|
||||
def __init__(self, n_features, kernel_size=7):
|
||||
super(ConvLayer, self).__init__()
|
||||
self.padding = nn.ConstantPad1d((kernel_size - 1) // 2, 0.0)
|
||||
self.conv = nn.Conv1d(in_channels=n_features, out_channels=n_features, kernel_size=kernel_size)
|
||||
self.relu = nn.ReLU()
|
||||
|
||||
def forward(self, x):
|
||||
x = x.permute(0, 2, 1)
|
||||
x = self.padding(x)
|
||||
x = self.relu(self.conv(x))
|
||||
return x.permute(0, 2, 1) # Permute back
|
||||
|
||||
|
||||
class FeatureAttentionLayer(nn.Module):
|
||||
"""Single Graph Feature/Spatial Attention Layer
|
||||
:param n_features: Number of input features/nodes
|
||||
:param window_size: length of the input sequence
|
||||
:param dropout: percentage of nodes to dropout
|
||||
:param alpha: negative slope used in the leaky rely activation function
|
||||
:param embed_dim: embedding dimension (output dimension of linear transformation)
|
||||
:param use_gatv2: whether to use the modified attention mechanism of GATv2 instead of standard GAT
|
||||
:param use_bias: whether to include a bias term in the attention layer
|
||||
"""
|
||||
|
||||
def __init__(self, n_features, window_size, dropout, alpha, embed_dim=None, use_gatv2=True, use_bias=True,
|
||||
use_softmax=True):
|
||||
super(FeatureAttentionLayer, self).__init__()
|
||||
self.n_features = n_features
|
||||
self.window_size = window_size
|
||||
self.dropout = dropout
|
||||
self.embed_dim = embed_dim if embed_dim is not None else window_size
|
||||
self.use_gatv2 = use_gatv2
|
||||
self.num_nodes = n_features
|
||||
self.use_bias = use_bias
|
||||
self.use_softmax = use_softmax
|
||||
|
||||
# Because linear transformation is done after concatenation in GATv2
|
||||
if self.use_gatv2:
|
||||
self.embed_dim *= 2
|
||||
lin_input_dim = 2 * window_size
|
||||
a_input_dim = self.embed_dim
|
||||
else:
|
||||
lin_input_dim = window_size
|
||||
a_input_dim = 2 * self.embed_dim
|
||||
|
||||
self.lin = nn.Linear(lin_input_dim, self.embed_dim)
|
||||
self.a = nn.Parameter(torch.empty((a_input_dim, 1)))
|
||||
nn.init.xavier_uniform_(self.a.data, gain=1.414)
|
||||
|
||||
if self.use_bias:
|
||||
self.bias = nn.Parameter(torch.ones(n_features, n_features))
|
||||
|
||||
self.leakyrelu = nn.LeakyReLU(alpha)
|
||||
self.sigmoid = nn.Sigmoid()
|
||||
|
||||
def forward(self, x):
|
||||
# x shape (b, n, k): b - batch size, n - window size, k - number of features
|
||||
# For feature attention we represent a node as the values of a particular feature across all timestamps
|
||||
|
||||
x = x.permute(0, 2, 1)
|
||||
|
||||
# 'Dynamic' GAT attention
|
||||
# Proposed by Brody et. al., 2021 (https://arxiv.org/pdf/2105.14491.pdf)
|
||||
# Linear transformation applied after concatenation and attention layer applied after leakyrelu
|
||||
if self.use_gatv2:
|
||||
a_input = self._make_attention_input(x) # (b, k, k, 2*window_size)
|
||||
a_input = self.leakyrelu(self.lin(a_input)) # (b, k, k, embed_dim)
|
||||
e = torch.matmul(a_input, self.a).squeeze(3) # (b, k, k, 1)
|
||||
|
||||
# Original GAT attention
|
||||
else:
|
||||
Wx = self.lin(x) # (b, k, k, embed_dim)
|
||||
a_input = self._make_attention_input(Wx) # (b, k, k, 2*embed_dim)
|
||||
e = self.leakyrelu(torch.matmul(a_input, self.a)).squeeze(3) # (b, k, k, 1)
|
||||
|
||||
if self.use_bias:
|
||||
e += self.bias
|
||||
|
||||
# Attention weights
|
||||
if self.use_softmax:
|
||||
e = torch.softmax(e, dim=2)
|
||||
attention = torch.dropout(e, self.dropout, train=self.training)
|
||||
|
||||
# Computing new node features using the attention
|
||||
h = self.sigmoid(torch.matmul(attention, x))
|
||||
|
||||
return h.permute(0, 2, 1)
|
||||
|
||||
def _make_attention_input(self, v):
|
||||
"""Preparing the feature attention mechanism.
|
||||
Creating matrix with all possible combinations of concatenations of node.
|
||||
Each node consists of all values of that node within the window
|
||||
v1 || v1,
|
||||
...
|
||||
v1 || vK,
|
||||
v2 || v1,
|
||||
...
|
||||
v2 || vK,
|
||||
...
|
||||
...
|
||||
vK || v1,
|
||||
...
|
||||
vK || vK,
|
||||
"""
|
||||
|
||||
K = self.num_nodes
|
||||
blocks_repeating = v.repeat_interleave(K, dim=1) # Left-side of the matrix
|
||||
blocks_alternating = v.repeat(1, K, 1) # Right-side of the matrix
|
||||
combined = torch.cat((blocks_repeating, blocks_alternating), dim=2) # (b, K*K, 2*window_size)
|
||||
|
||||
if self.use_gatv2:
|
||||
return combined.view(v.size(0), K, K, 2 * self.window_size)
|
||||
else:
|
||||
return combined.view(v.size(0), K, K, 2 * self.embed_dim)
|
||||
|
||||
|
||||
class TemporalAttentionLayer(nn.Module):
|
||||
"""Single Graph Temporal Attention Layer
|
||||
:param n_features: number of input features/nodes
|
||||
:param window_size: length of the input sequence
|
||||
:param dropout: percentage of nodes to dropout
|
||||
:param alpha: negative slope used in the leaky rely activation function
|
||||
:param embed_dim: embedding dimension (output dimension of linear transformation)
|
||||
:param use_gatv2: whether to use the modified attention mechanism of GATv2 instead of standard GAT
|
||||
:param use_bias: whether to include a bias term in the attention layer
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, n_features, window_size, dropout, alpha, embed_dim=None, use_gatv2=True, use_bias=True,
|
||||
use_softmax=True):
|
||||
super(TemporalAttentionLayer, self).__init__()
|
||||
self.n_features = n_features
|
||||
self.window_size = window_size
|
||||
self.dropout = dropout
|
||||
self.use_gatv2 = use_gatv2
|
||||
self.embed_dim = embed_dim if embed_dim is not None else n_features
|
||||
self.num_nodes = window_size
|
||||
self.use_bias = use_bias
|
||||
self.use_softmax = use_softmax
|
||||
|
||||
# Because linear transformation is performed after concatenation in GATv2
|
||||
if self.use_gatv2:
|
||||
self.embed_dim *= 2
|
||||
lin_input_dim = 2 * n_features
|
||||
a_input_dim = self.embed_dim
|
||||
else:
|
||||
lin_input_dim = n_features
|
||||
a_input_dim = 2 * self.embed_dim
|
||||
|
||||
self.lin = nn.Linear(lin_input_dim, self.embed_dim)
|
||||
self.a = nn.Parameter(torch.empty((a_input_dim, 1)))
|
||||
nn.init.xavier_uniform_(self.a.data, gain=1.414)
|
||||
|
||||
if self.use_bias:
|
||||
self.bias = nn.Parameter(torch.ones(window_size, window_size))
|
||||
|
||||
self.leakyrelu = nn.LeakyReLU(alpha)
|
||||
self.sigmoid = nn.Sigmoid()
|
||||
|
||||
def forward(self, x):
|
||||
# x shape (b, n, k): b - batch size, n - window size, k - number of features
|
||||
# For temporal attention a node is represented as all feature values at a specific timestamp
|
||||
|
||||
# 'Dynamic' GAT attention
|
||||
# Proposed by Brody et. al., 2021 (https://arxiv.org/pdf/2105.14491.pdf)
|
||||
# Linear transformation applied after concatenation and attention layer applied after leakyrelu
|
||||
if self.use_gatv2:
|
||||
a_input = self._make_attention_input(x) # (b, n, n, 2*n_features)
|
||||
a_input = self.leakyrelu(self.lin(a_input)) # (b, n, n, embed_dim)
|
||||
e = torch.matmul(a_input, self.a).squeeze(3) # (b, n, n, 1)
|
||||
|
||||
# Original GAT attention
|
||||
else:
|
||||
Wx = self.lin(x) # (b, n, n, embed_dim)
|
||||
a_input = self._make_attention_input(Wx) # (b, n, n, 2*embed_dim)
|
||||
e = self.leakyrelu(torch.matmul(a_input, self.a)).squeeze(3) # (b, n, n, 1)
|
||||
|
||||
if self.use_bias:
|
||||
e += self.bias # (b, n, n, 1)
|
||||
|
||||
# Attention weights
|
||||
if self.use_softmax:
|
||||
e = torch.softmax(e, dim=2)
|
||||
attention = torch.dropout(e, self.dropout, train=self.training)
|
||||
|
||||
h = self.sigmoid(torch.matmul(attention, x)) # (b, n, k)
|
||||
|
||||
return h
|
||||
|
||||
def _make_attention_input(self, v):
|
||||
"""Preparing the temporal attention mechanism.
|
||||
Creating matrix with all possible combinations of concatenations of node values:
|
||||
(v1, v2..)_t1 || (v1, v2..)_t1
|
||||
(v1, v2..)_t1 || (v1, v2..)_t2
|
||||
|
||||
...
|
||||
...
|
||||
|
||||
(v1, v2..)_tn || (v1, v2..)_t1
|
||||
(v1, v2..)_tn || (v1, v2..)_t2
|
||||
|
||||
"""
|
||||
|
||||
K = self.num_nodes
|
||||
blocks_repeating = v.repeat_interleave(K, dim=1) # Left-side of the matrix
|
||||
blocks_alternating = v.repeat(1, K, 1) # Right-side of the matrix
|
||||
combined = torch.cat((blocks_repeating, blocks_alternating), dim=2)
|
||||
|
||||
if self.use_gatv2:
|
||||
return combined.view(v.size(0), K, K, 2 * self.n_features)
|
||||
else:
|
||||
return combined.view(v.size(0), K, K, 2 * self.embed_dim)
|
||||
|
||||
|
||||
class FullAttention(nn.Module):
|
||||
def __init__(self, mask_flag=True, scale=None, attention_dropout=0.1, output_attention=False):
|
||||
super(FullAttention, self).__init__()
|
||||
self.scale = scale
|
||||
self.mask_flag = mask_flag
|
||||
self.output_attention = output_attention
|
||||
self.dropout = nn.Dropout(attention_dropout)
|
||||
self.relu_q = nn.ReLU()
|
||||
self.relu_k = nn.ReLU()
|
||||
|
||||
@staticmethod
|
||||
def TriangularCausalMask(B, L, S, device='cpu'):
|
||||
mask_shape = [B, 1, L, S]
|
||||
with torch.no_grad():
|
||||
mask = torch.triu(torch.ones(mask_shape, dtype=torch.bool), diagonal=1)
|
||||
return mask.to(device)
|
||||
|
||||
def forward(self, queries, keys, values, attn_mask):
|
||||
B, L, H, E = queries.shape
|
||||
_, S, _, D = values.shape
|
||||
scale = self.scale or 1. / sqrt(E) # scale相对于取多少比例,取前1/根号n
|
||||
|
||||
scores = torch.einsum("blhe,bshe->bhls", queries, keys)
|
||||
if self.mask_flag:
|
||||
if attn_mask is None:
|
||||
attn_mask = self.TriangularCausalMask(B, L, S, device=queries.device)
|
||||
|
||||
scores.masked_fill_(attn_mask, 0)
|
||||
|
||||
A = self.dropout(torch.softmax(scale * scores, dim=-1))
|
||||
V = torch.einsum("bhls,bshd->blhd", A, values)
|
||||
|
||||
# queries = self.relu_q(queries)
|
||||
# keys = self.relu_k(keys)
|
||||
# KV = torch.einsum("blhe,bshe->bhls", keys, values)
|
||||
# A = self.dropout(scale * KV)
|
||||
# V = torch.einsum("bshd,bhls->blhd", queries, A)
|
||||
|
||||
if self.output_attention:
|
||||
return (V.contiguous(), A)
|
||||
else:
|
||||
return (V.contiguous(), None)
|
||||
|
||||
|
||||
class ProbAttention(nn.Module):
|
||||
def __init__(self, mask_flag=True, factor=2, scale=None, attention_dropout=0.1, output_attention=False):
|
||||
super(ProbAttention, self).__init__()
|
||||
self.factor = factor
|
||||
self.scale = scale
|
||||
self.mask_flag = mask_flag
|
||||
self.output_attention = output_attention
|
||||
|
||||
@staticmethod
|
||||
def ProbMask(B, H, D, index, scores, device='cpu'):
|
||||
_mask = torch.ones(D, scores.shape[-2], dtype=torch.bool).triu(1)
|
||||
_mask_ex = _mask[None, None, :].expand(B, H, D, scores.shape[-2])
|
||||
indicator = _mask_ex.transpose(-2, -1)[torch.arange(B)[:, None, None],
|
||||
torch.arange(H)[None, :, None],
|
||||
index, :].transpose(-2, -1)
|
||||
mask = indicator.view(scores.shape)
|
||||
return mask.to(device)
|
||||
|
||||
def _prob_KV(self, K, V, sample_v, n_top): # n_top: c*ln(L_q)
|
||||
# Q [B, H, L, D]
|
||||
B, H, L, E_V = V.shape
|
||||
_, _, _, E_K = K.shape
|
||||
|
||||
# calculate the sampled K_V
|
||||
|
||||
V_expand = V.transpose(-2, -1).unsqueeze(-2).expand(B, H, E_V, E_K, L)
|
||||
index_sample = torch.randint(E_V, (E_K, sample_v)) # real U = U_part(factor*ln(L_k))*L_q
|
||||
V_sample = V_expand[:, :, torch.arange(E_V).unsqueeze(1), index_sample, :]
|
||||
K_V_sample = torch.matmul(K.transpose(-2, -1).unsqueeze(-2), V_sample.transpose(-2, -1)).squeeze()
|
||||
|
||||
# find the Top_k query with sparisty measurement
|
||||
M = K_V_sample.max(-1)[0] - torch.div(K_V_sample.sum(-1), E_V)
|
||||
M_top = M.topk(n_top, sorted=False)[1]
|
||||
|
||||
# use the reduced Q to calculate Q_K
|
||||
V_reduce = V.transpose(-2, -1)[torch.arange(B)[:, None, None],
|
||||
torch.arange(H)[None, :, None],
|
||||
M_top, :].transpose(-2, -1) # factor*ln(L_q)
|
||||
K_V = torch.matmul(K.transpose(-2, -1), V_reduce) # factor*ln(L_q)*L_k
|
||||
#
|
||||
return K_V, M_top
|
||||
|
||||
def _get_initial_context(self, V, L_Q):
|
||||
B, H, L_V, D = V.shape
|
||||
if not self.mask_flag:
|
||||
# V_sum = V.sum(dim=-2)
|
||||
V_sum = V.mean(dim=-2)
|
||||
contex = V_sum.unsqueeze(-2).expand(B, H, L_Q, V_sum.shape[-1]).clone()
|
||||
else: # use mask
|
||||
assert (L_Q == L_V) # requires that L_Q == L_V, i.e. for self-attention only
|
||||
contex = V.cumsum(dim=-2)
|
||||
return contex
|
||||
|
||||
def _update_context(self, context_in, Q, scores, index, D_K, attn_mask):
|
||||
B, H, L, D_Q = Q.shape
|
||||
|
||||
if self.mask_flag:
|
||||
attn_mask = self.ProbMask(B, H, D_K, index, scores, device=Q.device)
|
||||
scores.masked_fill_(attn_mask, -np.inf)
|
||||
|
||||
attn = torch.softmax(scores, dim=-1) # nn.Softmax(dim=-1)(scores)
|
||||
|
||||
context_in.transpose(-2, -1)[torch.arange(B)[:, None, None],
|
||||
torch.arange(H)[None, :, None],
|
||||
index, :] = torch.matmul(Q, attn).type_as(context_in).transpose(-2, -1)
|
||||
if self.output_attention:
|
||||
attns = (torch.ones([B, H, D_K, D_K]) / D_K).type_as(attn)
|
||||
attns[torch.arange(B)[:, None, None], torch.arange(H)[None, :, None], index, :] = attn
|
||||
return (context_in, attns)
|
||||
else:
|
||||
return (context_in, None)
|
||||
|
||||
def forward(self, queries, keys, values, attn_mask):
|
||||
# B, L_Q, H, D = queries.shape
|
||||
# _, L_K, _, _ = keys.shape
|
||||
|
||||
B, L, H, D_K = keys.shape
|
||||
_, _, _, D_V = values.shape
|
||||
|
||||
queries = queries.transpose(2, 1)
|
||||
keys = keys.transpose(2, 1)
|
||||
values = values.transpose(2, 1)
|
||||
|
||||
U_part = self.factor * np.ceil(np.log(D_V)).astype('int').item() # c*ln(L_k)
|
||||
u = self.factor * np.ceil(np.log(D_K)).astype('int').item() # c*ln(L_q)
|
||||
|
||||
U_part = U_part if U_part < D_V else D_V
|
||||
u = u if u < D_K else D_K
|
||||
|
||||
scores_top, index = self._prob_KV(keys, values, sample_v=U_part, n_top=u)
|
||||
|
||||
# add scale factor
|
||||
scale = self.scale or 1. / sqrt(D_K)
|
||||
if scale is not None:
|
||||
scores_top = scores_top * scale
|
||||
# get the context
|
||||
context = self._get_initial_context(queries, L)
|
||||
# update the context with selected top_k queries
|
||||
context, attn = self._update_context(context, queries, scores_top, index, D_K, attn_mask)
|
||||
|
||||
return context.contiguous(), attn
|
||||
|
||||
|
||||
class AttentionBlock(nn.Module):
|
||||
def __init__(self, d_model, n_model, n_heads=8, d_keys=None, d_values=None):
|
||||
super(AttentionBlock, self).__init__()
|
||||
|
||||
d_keys = d_keys or (d_model // n_heads)
|
||||
d_values = d_values or (d_model // n_heads)
|
||||
self.inner_attention = FullAttention()
|
||||
# self.inner_attention = ProbAttention(device=device)
|
||||
self.query_projection = nn.Linear(d_model, d_keys * n_heads)
|
||||
self.key_projection = nn.Linear(d_model, d_keys * n_heads)
|
||||
self.value_projection = nn.Linear(d_model, d_values * n_heads)
|
||||
self.out_projection = nn.Linear(d_values * n_heads, d_model)
|
||||
self.n_heads = n_heads
|
||||
self.layer_norm = nn.LayerNorm(d_model, eps=1e-6)
|
||||
|
||||
def forward(self, queries, keys, values, attn_mask):
|
||||
'''
|
||||
Q: [batch_size, len_q, d_k]
|
||||
K: [batch_size, len_k, d_k]
|
||||
V: [batch_size, len_v(=len_k), d_v]
|
||||
attn_mask: [batch_size, seq_len, seq_len]
|
||||
'''
|
||||
batch_size, len_q, _ = queries.shape
|
||||
_, len_k, _ = keys.shape
|
||||
|
||||
queries = self.query_projection(queries).view(batch_size, len_q, self.n_heads, -1)
|
||||
keys = self.key_projection(keys).view(batch_size, len_k, self.n_heads, -1)
|
||||
values = self.value_projection(values).view(batch_size, len_k, self.n_heads, -1)
|
||||
|
||||
out, attn = self.inner_attention(
|
||||
queries,
|
||||
keys,
|
||||
values,
|
||||
attn_mask
|
||||
)
|
||||
out = out.view(batch_size, len_q, -1)
|
||||
out = self.out_projection(out)
|
||||
out = self.layer_norm(out)
|
||||
return out, attn
|
||||
|
||||
|
||||
class GRULayer(nn.Module):
|
||||
"""Gated Recurrent Unit (GRU) Layer
|
||||
:param in_dim: number of input features
|
||||
:param hid_dim: hidden size of the GRU
|
||||
:param n_layers: number of layers in GRU
|
||||
:param dropout: dropout rate
|
||||
"""
|
||||
|
||||
def __init__(self, in_dim, hid_dim, n_layers, dropout):
|
||||
super(GRULayer, self).__init__()
|
||||
self.hid_dim = hid_dim
|
||||
self.n_layers = n_layers
|
||||
self.dropout = 0.0 if n_layers == 1 else dropout
|
||||
self.gru = nn.GRU(in_dim, hid_dim, num_layers=n_layers, batch_first=True, dropout=self.dropout)
|
||||
|
||||
def forward(self, x):
|
||||
out, h = self.gru(x)
|
||||
out, h = out[-1, :, :], h[-1, :, :] # Extracting from last layer
|
||||
return out, h
|
||||
|
||||
|
||||
class RNNDecoder(nn.Module):
|
||||
"""GRU-based Decoder network that converts latent vector into output
|
||||
:param in_dim: number of input features
|
||||
:param n_layers: number of layers in RNN
|
||||
:param hid_dim: hidden size of the RNN
|
||||
:param dropout: dropout rate
|
||||
"""
|
||||
|
||||
def __init__(self, in_dim, hid_dim, n_layers, dropout):
|
||||
super(RNNDecoder, self).__init__()
|
||||
self.in_dim = in_dim
|
||||
self.dropout = 0.0 if n_layers == 1 else dropout
|
||||
self.rnn = nn.GRU(in_dim, hid_dim, n_layers, batch_first=True, dropout=self.dropout)
|
||||
|
||||
def forward(self, x):
|
||||
decoder_out, _ = self.rnn(x)
|
||||
return decoder_out
|
||||
|
||||
|
||||
class ReconstructionModel(nn.Module):
|
||||
"""Reconstruction Model
|
||||
:param window_size: length of the input sequence
|
||||
:param in_dim: number of input features
|
||||
:param n_layers: number of layers in RNN
|
||||
:param hid_dim: hidden size of the RNN
|
||||
:param in_dim: number of output features
|
||||
:param dropout: dropout rate
|
||||
"""
|
||||
|
||||
def __init__(self, window_size, in_dim, hid_dim, out_dim, n_layers, dropout):
|
||||
super(ReconstructionModel, self).__init__()
|
||||
self.window_size = window_size
|
||||
self.decoder = RNNDecoder(in_dim, hid_dim, n_layers, dropout)
|
||||
self.fc = nn.Linear(hid_dim, out_dim)
|
||||
|
||||
def forward(self, x):
|
||||
# x will be last hidden state of the GRU layer
|
||||
h_end = x
|
||||
h_end_rep = h_end.repeat_interleave(self.window_size, dim=1).view(x.size(0), self.window_size, -1)
|
||||
|
||||
decoder_out = self.decoder(h_end_rep)
|
||||
out = self.fc(decoder_out)
|
||||
return out
|
||||
|
||||
|
||||
class Forecasting_Model(nn.Module):
|
||||
"""Forecasting model (fully-connected network)
|
||||
:param in_dim: number of input features
|
||||
:param hid_dim: hidden size of the FC network
|
||||
:param out_dim: number of output features
|
||||
:param n_layers: number of FC layers
|
||||
:param dropout: dropout rate
|
||||
"""
|
||||
|
||||
def __init__(self, in_dim, hid_dim, out_dim, n_layers, dropout):
|
||||
super(Forecasting_Model, self).__init__()
|
||||
layers = [nn.Linear(in_dim, hid_dim)]
|
||||
for _ in range(n_layers - 1):
|
||||
layers.append(nn.Linear(hid_dim, hid_dim))
|
||||
|
||||
layers.append(nn.Linear(hid_dim, out_dim))
|
||||
|
||||
self.layers = nn.ModuleList(layers)
|
||||
self.dropout = nn.Dropout(dropout)
|
||||
self.relu = nn.ReLU()
|
||||
|
||||
def forward(self, x):
|
||||
for i in range(len(self.layers) - 1):
|
||||
x = self.relu(self.layers[i](x))
|
||||
x = self.dropout(x)
|
||||
return self.layers[-1](x)
|
||||
|
||||
|
||||
class Model(nn.Module):
|
||||
""" MTAD_GAT model class.
|
||||
|
||||
:param n_features: Number of input features
|
||||
:param window_size: Length of the input sequence
|
||||
:param out_dim: Number of features to output
|
||||
:param kernel_size: size of kernel to use in the 1-D convolution
|
||||
:param feat_gat_embed_dim: embedding dimension (output dimension of linear transformation)
|
||||
in feat-oriented GAT layer
|
||||
:param time_gat_embed_dim: embedding dimension (output dimension of linear transformation)
|
||||
in time-oriented GAT layer
|
||||
:param use_gatv2: whether to use the modified attention mechanism of GATv2 instead of standard GAT
|
||||
:param gru_n_layers: number of layers in the GRU layer
|
||||
:param gru_hid_dim: hidden dimension in the GRU layer
|
||||
:param forecast_n_layers: number of layers in the FC-based Forecasting Model
|
||||
:param forecast_hid_dim: hidden dimension in the FC-based Forecasting Model
|
||||
:param recon_n_layers: number of layers in the GRU-based Reconstruction Model
|
||||
:param recon_hid_dim: hidden dimension in the GRU-based Reconstruction Model
|
||||
:param dropout: dropout rate
|
||||
:param alpha: negative slope used in the leaky rely activation function
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, customs: dict, dataloader: tud.DataLoader = None):
|
||||
super(Model, self).__init__()
|
||||
n_features = dataloader.dataset.train_inputs.shape[-1]
|
||||
window_size = int(customs["input_size"])
|
||||
out_dim = n_features
|
||||
kernel_size = 7
|
||||
feat_gat_embed_dim = None
|
||||
time_gat_embed_dim = None
|
||||
use_gatv2 = True
|
||||
gru_n_layers = 1
|
||||
gru_hid_dim = 150
|
||||
forecast_n_layers = 1
|
||||
forecast_hid_dim = 150
|
||||
recon_n_layers = 1
|
||||
recon_hid_dim = 150
|
||||
dropout = 0.2
|
||||
alpha = 0.2
|
||||
optimize = True
|
||||
|
||||
self.name = "MtadGatAtt"
|
||||
self.optimize = optimize
|
||||
use_softmax = not optimize
|
||||
|
||||
self.conv = ConvLayer(n_features, kernel_size)
|
||||
self.feature_gat = FeatureAttentionLayer(
|
||||
n_features, window_size, dropout, alpha, feat_gat_embed_dim, use_gatv2, use_softmax=use_softmax)
|
||||
self.temporal_gat = TemporalAttentionLayer(n_features, window_size, dropout, alpha, time_gat_embed_dim,
|
||||
use_gatv2, use_softmax=use_softmax)
|
||||
self.forecasting_model = Forecasting_Model(
|
||||
gru_hid_dim, forecast_hid_dim, out_dim, forecast_n_layers, dropout)
|
||||
if optimize:
|
||||
self.encode = AttentionBlock(3 * n_features, window_size)
|
||||
self.encode_feature = nn.Linear(3 * n_features * window_size, gru_hid_dim)
|
||||
self.decode_feature = nn.Linear(gru_hid_dim, n_features * window_size)
|
||||
self.decode = AttentionBlock(n_features, window_size)
|
||||
else:
|
||||
self.gru = GRULayer(3 * n_features, gru_hid_dim, gru_n_layers, dropout)
|
||||
self.recon_model = ReconstructionModel(window_size, gru_hid_dim, recon_hid_dim, out_dim, recon_n_layers,
|
||||
dropout)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv(x)
|
||||
h_feat = self.feature_gat(x)
|
||||
h_temp = self.temporal_gat(x)
|
||||
h_cat = torch.cat([x, h_feat, h_temp], dim=2) # (b, n, 3k)
|
||||
|
||||
if self.optimize:
|
||||
h_end, _ = self.encode(h_cat, h_cat, h_cat, None)
|
||||
h_end = self.encode_feature(h_end.reshape(h_end.size(0), -1))
|
||||
else:
|
||||
_, h_end = self.gru(h_cat)
|
||||
h_end = h_end.view(x.shape[0], -1) # Hidden state for last timestamp
|
||||
|
||||
predictions = self.forecasting_model(h_end)
|
||||
|
||||
if self.optimize:
|
||||
h_end = self.decode_feature(h_end)
|
||||
h_end = h_end.reshape(x.shape[0], x.shape[1], x.shape[2])
|
||||
recons, _ = self.decode(h_end, h_end, h_end, None)
|
||||
else:
|
||||
recons = self.recon_model(h_end)
|
||||
|
||||
return predictions, recons
|
||||
|
||||
def loss(self, x, y_true, epoch: int = None, device: str = "cpu"):
|
||||
preds, recons = self.forward(x)
|
||||
|
||||
if preds.ndim == 3:
|
||||
preds = preds.squeeze(1)
|
||||
if y_true.ndim == 3:
|
||||
y_true = y_true.squeeze(1)
|
||||
forecast_criterion = nn.MSELoss()
|
||||
recon_criterion = nn.MSELoss()
|
||||
|
||||
forecast_loss = torch.sqrt(forecast_criterion(y_true, preds))
|
||||
recon_loss = torch.sqrt(recon_criterion(x, recons))
|
||||
|
||||
loss = forecast_loss + recon_loss
|
||||
loss.backward()
|
||||
return loss.item()
|
||||
|
||||
def detection(self, x, y_true, epoch: int = None, device: str = "cpu"):
|
||||
preds, recons = self.forward(x)
|
||||
score = F.pairwise_distance(recons.reshape(recons.size(0), -1), x.reshape(x.size(0), -1)) + F.pairwise_distance(y_true.reshape(y_true.size(0), -1), preds.reshape(preds.size(0), -1))
|
||||
return score, None
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from tqdm import tqdm
|
||||
import time
|
||||
epoch = 10000
|
||||
batch_size = 1
|
||||
# device = 'cuda:1' if torch.cuda.is_available() else 'cpu'
|
||||
device = 'cpu'
|
||||
input_len_list = [30, 60, 90, 120, 150, 180, 210, 240, 270, 300]
|
||||
for input_len in input_len_list:
|
||||
model = Model(52, input_len, 52, optimize=False, device=device).to(device)
|
||||
a = torch.Tensor(torch.ones((batch_size, input_len, 52))).to(device)
|
||||
start = time.time()
|
||||
for i in tqdm(range(epoch)):
|
||||
model(a)
|
||||
end = time.time()
|
||||
speed1 = batch_size * epoch / (end - start)
|
||||
|
||||
model = Model(52, input_len, 52, optimize=True, device=device).to(device)
|
||||
a = torch.Tensor(torch.ones((batch_size, input_len, 52))).to(device)
|
||||
start = time.time()
|
||||
for i in tqdm(range(epoch)):
|
||||
model(a)
|
||||
end = time.time()
|
||||
speed2 = batch_size * epoch / (end - start)
|
||||
print(input_len, (speed2 - speed1)/speed1, speed1, speed2)
|
||||
|
||||
3
method/__init__.py
Normal file
3
method/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
from .MtadGat import Model
|
||||
from .MtadGatAtt import Model
|
||||
from .AnomalyTransformer import Model
|
||||
50
method/template.py
Normal file
50
method/template.py
Normal file
@@ -0,0 +1,50 @@
|
||||
import torch.nn as nn
|
||||
import torch.utils.data as tud
|
||||
import torch
|
||||
|
||||
|
||||
class Model(nn.Module):
|
||||
def __init__(self, customs: dict, dataloader: tud.DataLoader = None):
|
||||
"""
|
||||
:param customs: 自定义参数,内容取自于config.ini文件的[CustomParameters]部分。
|
||||
:param dataloader: 数据集初始化完成的dataloader。在自定义的预处理方法文件中,可以增加内部变量或者方法,提供给模型。
|
||||
例如:模型初始化需要数据的维度数量,可通过n_features = dataloader.dataset.train_inputs.shape[-1]获取
|
||||
或在预处理方法的MyDataset类中,定义self.n_features = self.train_inputs.shape[-1],
|
||||
通过n_features = dataloader.dataset.n_features获取
|
||||
"""
|
||||
super(Model, self).__init__()
|
||||
|
||||
def forward(self, x):
|
||||
"""
|
||||
:param x: 模型的输入,在本工具中为MyDataset类中__getitem__方法返回的三个变量中的第一个变量。
|
||||
:return: 模型的输出,可以自定义
|
||||
"""
|
||||
return None
|
||||
|
||||
def loss(self, x, y_true, epoch: int = None, device: str = "cpu"):
|
||||
"""
|
||||
计算loss。注意,计算loss时如采用torch之外的库计算会造成梯度截断,请全部使用torch的方法
|
||||
:param x: 输入数据
|
||||
:param y_true: 真实输出数据
|
||||
:param epoch: 当前是第几个epoch
|
||||
:param device: 设备,cpu或者cuda
|
||||
:return: loss值
|
||||
"""
|
||||
y_pred = self.forward(x) # 模型的输出
|
||||
loss = torch.Tensor([1]) # 示例,请修改
|
||||
loss.backward()
|
||||
return loss.item()
|
||||
|
||||
def detection(self, x, y_true, epoch: int = None, device: str = "cpu"):
|
||||
"""
|
||||
检测方法,可以输出异常的分数,也可以输出具体的标签。
|
||||
如输出异常分数,则后续会根据异常分数自动划分阈值,高于阈值的为异常,自动赋予标签;如输出标签,则直接进行评估。
|
||||
:param x: 输入数据
|
||||
:param y_true: 真实输出数据
|
||||
:param epoch: 当前是第几个epoch
|
||||
:param device: 设备,cpu或者cuda
|
||||
:return: score,label。如选择输出异常的分数,则输出score,label为None;如选择输出标签,则输出label,score为None。
|
||||
score的格式为torch的Tensor格式,尺寸为[batch_size];label的格式为torch的IntTensor格式,尺寸为[batch_size]
|
||||
"""
|
||||
y_pred = self.forward(x) # 模型的输出
|
||||
return None, None
|
||||
1
preprocess/__init__.py
Normal file
1
preprocess/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
from .standardization import MyDataset
|
||||
119
preprocess/standardization.py
Normal file
119
preprocess/standardization.py
Normal file
@@ -0,0 +1,119 @@
|
||||
from torch.utils.data import Dataset
|
||||
from torch import float32, Tensor
|
||||
from numpy import array, where
|
||||
|
||||
|
||||
class MyDataset(Dataset):
|
||||
def __init__(self, name: str, train_path: str = None, test_path: str = None, input_size: int = 1,
|
||||
output_size: int = 1, step: int = 1, mode: str = 'train', time_index: bool = True,
|
||||
del_column_name: bool = True):
|
||||
"""
|
||||
可以将csv文件批量转成tensor
|
||||
:param name: 数据集名称。
|
||||
:param train_path: 训练数据集路径。
|
||||
:param test_path: 测试数据集路径。
|
||||
:param input_size: 输入数据长度。
|
||||
:param output_size: 输出数据长度。
|
||||
:param step: 截取数据的窗口移动间隔。
|
||||
:param mode: train或者test,用于指示使用训练集数据还是测试集数据。
|
||||
:param time_index: True为第一列是时间戳,False为不。
|
||||
:param del_column_name: 文件中第一行为列名时,使用True。
|
||||
"""
|
||||
self.name = name
|
||||
self.input_size = input_size
|
||||
self.output_size = output_size
|
||||
self.del_column_name = del_column_name
|
||||
self.step = step
|
||||
self.mode = mode
|
||||
self.time_index = time_index
|
||||
self.train_inputs, self.train_labels, self.train_outputs, self.test_inputs, self.test_labels, self.test_outputs\
|
||||
= self.parse_data(train_path, test_path)
|
||||
self.train_inputs = Tensor(self.train_inputs).to(float32) if self.train_inputs is not None else None
|
||||
self.train_labels = Tensor(self.train_labels).to(float32) if self.train_labels is not None else None
|
||||
self.train_outputs = Tensor(self.train_outputs).to(float32) if self.train_outputs is not None else None
|
||||
self.test_inputs = Tensor(self.test_inputs).to(float32) if self.test_inputs is not None else None
|
||||
self.test_labels = Tensor(self.test_labels).to(float32) if self.test_labels is not None else None
|
||||
self.test_outputs = Tensor(self.test_outputs).to(float32) if self.test_outputs is not None else None
|
||||
|
||||
def parse_data(self, train_path: str = None, test_path: str = None):
|
||||
if train_path is None and test_path is None:
|
||||
raise ValueError("train_path is None and test_path is None.")
|
||||
|
||||
mean = None
|
||||
deviation = None
|
||||
train_data_input, train_label, train_data_output = None, None, None
|
||||
test_data_input, test_label, test_data_output = None, None, None
|
||||
|
||||
# 读取训练集数据
|
||||
if train_path:
|
||||
train_data = []
|
||||
train_label = []
|
||||
with open(train_path, 'r', encoding='utf8') as f:
|
||||
if self.del_column_name is True:
|
||||
data = f.readlines()[1:]
|
||||
else:
|
||||
data = f.readlines()
|
||||
train_data.extend([list(map(float, line.strip().split(','))) for line in data])
|
||||
train_label.extend([0 for _ in data])
|
||||
train_np = array(train_data)
|
||||
if self.time_index:
|
||||
train_np[:, 0] = train_np[:, 0] % 86400
|
||||
mean = train_np.mean(axis=0) # 计算平均数
|
||||
deviation = train_np.std(axis=0) # 计算标准差
|
||||
deviation = where(deviation != 0, deviation, 1)
|
||||
train_np = (train_np - mean) / deviation # 标准化
|
||||
train_data = train_np.tolist()
|
||||
train_data_input, train_data_output, train_label = self.cut_data(train_data, train_label)
|
||||
|
||||
# 读取测试集数据
|
||||
if test_path:
|
||||
test_data = []
|
||||
test_label = []
|
||||
with open(test_path, 'r', encoding='utf8') as f:
|
||||
if self.del_column_name is True:
|
||||
data = f.readlines()[1:]
|
||||
else:
|
||||
data = f.readlines()
|
||||
test_data.extend([list(map(float, line.strip().split(',')))[:-1] for line in data])
|
||||
test_label.extend([int(line.strip().split(',')[-1]) for line in data])
|
||||
test_np = array(test_data)
|
||||
if self.time_index:
|
||||
test_np[:, 0] = test_np[:, 0] % 86400
|
||||
# mean = test_np.mean(axis=0) # 计算平均数
|
||||
# deviation = test_np.std(axis=0) # 计算标准差
|
||||
# deviation = where(deviation != 0, deviation, 1)
|
||||
test_np = (test_np - mean) / deviation # 标准化
|
||||
test_data = test_np.tolist()
|
||||
# 自动判断是否需要反转标签。异常标签统一认为是1,当异常标签超过一半时,需反转标签
|
||||
if sum(test_label) > 0.5*len(test_label):
|
||||
test_label = (1-array(test_label)).tolist()
|
||||
test_data_input, test_data_output, test_label = self.cut_data(test_data, test_label)
|
||||
|
||||
return train_data_input, train_label, train_data_output, test_data_input, test_label, test_data_output
|
||||
|
||||
def cut_data(self, data: [[float]], label: [int]):
|
||||
n = 0
|
||||
input_data, output_data, anomaly_label = [], [], []
|
||||
while n + self.input_size + self.output_size <= len(data):
|
||||
input_data.append(data[n: n + self.input_size])
|
||||
output_data.append(data[n + self.input_size: n + self.input_size + self.output_size])
|
||||
anomaly_label.append([max(label[n + self.input_size: n + self.input_size + self.output_size])])
|
||||
n = n + self.step
|
||||
return input_data.copy(), output_data.copy(), anomaly_label.copy()
|
||||
|
||||
def __len__(self):
|
||||
if self.mode == 'train':
|
||||
return self.train_inputs.shape[0]
|
||||
elif self.mode == 'test':
|
||||
return self.test_inputs.shape[0]
|
||||
|
||||
def __getitem__(self, idx):
|
||||
if self.mode == 'train':
|
||||
return self.train_inputs[idx], self.train_labels[idx], self.train_outputs[idx]
|
||||
elif self.mode == 'test':
|
||||
return self.test_inputs[idx], self.test_labels[idx], self.test_outputs[idx]
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app = MyDataset('../dataset/SWAT/train.csv', test_path='../dataset/SWAT/test.csv', input_size=3)
|
||||
print(app)
|
||||
61
preprocess/template.py
Normal file
61
preprocess/template.py
Normal file
@@ -0,0 +1,61 @@
|
||||
"""
|
||||
模板文件,有自定义预处理方法可以通过编辑本文件实现数据集预处理。
|
||||
编辑完成以后,请将文件名修改,写入config.ini,并在同级目录下init文件添加本文件
|
||||
"""
|
||||
|
||||
|
||||
from torch.utils.data import Dataset
|
||||
|
||||
|
||||
class DataSet(Dataset):
|
||||
def __init__(self, train_path: str = None, test_path: str = None, input_size: int = 1, output_size: int = 1,
|
||||
step: int = 1, mode: str = 'train', del_time: bool = True, del_column_name: bool = True,
|
||||
reverse_label: bool = True):
|
||||
"""
|
||||
可以将csv文件批量转成tensor
|
||||
|
||||
注意:必须包含以下变量或方法。
|
||||
变量:self.train_inputs、self.train_labels、self.train_outputs
|
||||
self.test_inputs、self.test_labels、self.test_outputs、self.mode
|
||||
方法:__len__()、__getitem__()
|
||||
|
||||
:param train_path: str类型。训练数据集路径。
|
||||
:param test_path: str类型。测试数据集路径。
|
||||
:param input_size: int类型。输入数据长度。
|
||||
:param output_size: int类型。输出数据长度。
|
||||
:param step: int类型。截取数据的窗口移动间隔。
|
||||
:param mode: str类型。train或者test,用于指示使用训练集数据还是测试集数据。
|
||||
:param del_time: bool类型。True为删除时间戳列,False为不删除。
|
||||
:param del_column_name: bool类型。文件中第一行为列名时,使用True。
|
||||
:param reverse_label: bool类型。转化标签,0和1互换。标签统一采用正常为0异常为1的格式,若原文件中不符和该规定,使用True。
|
||||
"""
|
||||
|
||||
self.mode = mode
|
||||
self.train_inputs = None # 训练时的输入数据,Tensor格式,尺寸为[N,L,D]。N表示训练数据的数量,L表示每条数据的长度(由多少个时间点组成的数据),D表示数据维度数量
|
||||
self.train_labels = None # 训练时的数据标签,Tensor格式,尺寸为[N,1]。
|
||||
self.train_outputs = None # 训练时的输出数据,Tensor格式,尺寸为[N,L,D]。
|
||||
self.test_inputs = None # 测试时的输入数据,Tensor格式,尺寸为[N,L,D]。
|
||||
self.test_labels = None # 测试时的数据标签,Tensor格式,尺寸为[N,1]。
|
||||
self.test_outputs = None # 测试时的输出数据,Tensor格式,尺寸为[N,L,D]。
|
||||
|
||||
def __len__(self):
|
||||
"""
|
||||
提供数据集长度
|
||||
:return: 测试集或者训练集数据长度N
|
||||
"""
|
||||
if self.mode == 'train':
|
||||
return self.train_inputs.shape[0]
|
||||
elif self.mode == 'test':
|
||||
return self.test_inputs.shape[0]
|
||||
|
||||
def __getitem__(self, idx):
|
||||
"""
|
||||
获取数据
|
||||
:param idx: 数据序号
|
||||
:return: 对应的输入数据、标签、输出数据
|
||||
"""
|
||||
if self.mode == 'train':
|
||||
return self.train_inputs[idx], self.train_labels[idx], self.train_outputs[idx]
|
||||
elif self.mode == 'test':
|
||||
return self.test_inputs[idx], self.test_labels[idx], self.test_outputs[idx]
|
||||
|
||||
0
records/2023-04-06_14-37-45/log
Normal file
0
records/2023-04-06_14-37-45/log
Normal file
449890
records/2023-04-10_10-30-27/detection_result/MtadGatAtt_SWAT.csv
Normal file
449890
records/2023-04-10_10-30-27/detection_result/MtadGatAtt_SWAT.csv
Normal file
File diff suppressed because it is too large
Load Diff
8
requirements.txt
Normal file
8
requirements.txt
Normal file
@@ -0,0 +1,8 @@
|
||||
eventlet==0.33.3
|
||||
gevent==22.10.2
|
||||
loguru==0.6.0
|
||||
numpy==1.21.2
|
||||
pandas==1.4.1
|
||||
scikit_learn==1.2.2
|
||||
torch==1.10.2
|
||||
tqdm==4.63.1
|
||||
318
utils.py
Normal file
318
utils.py
Normal file
@@ -0,0 +1,318 @@
|
||||
import os
|
||||
import numpy
|
||||
import torch.nn as nn
|
||||
import torch
|
||||
import torch.utils.data as tud
|
||||
import preprocess
|
||||
import evaluation
|
||||
from loguru import logger
|
||||
import time
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
def create_dataloader(dataset_name: str, input_size: int = 1, output_size: int = 1, step: int = 1, batch_size: int = 1,
|
||||
time_index: bool = True, del_column_name: bool = True,
|
||||
preprocess_name: str = "standardization") -> (tud.DataLoader, tud.DataLoader):
|
||||
"""
|
||||
针对一个数据集,构建Dataloader
|
||||
:param dataset_name: 数据集名称
|
||||
:param input_size: 输入数据长度
|
||||
:param output_size: 输出数据长度
|
||||
:param step: 截取数据的窗口移动间隔
|
||||
:param batch_size: batch的大小
|
||||
:param time_index: True为第一列是时间戳,False为不
|
||||
:param del_column_name: 文件中第一行为列名时,使用True
|
||||
:param preprocess_name: 预处理方法
|
||||
:return: 训练数据与测试数据
|
||||
"""
|
||||
ds = eval(f"preprocess.{preprocess_name}.MyDataset")(name=dataset_name.replace("/", "-"),
|
||||
train_path=f"./dataset/{dataset_name}/train.csv",
|
||||
test_path=f"./dataset/{dataset_name}/test.csv",
|
||||
input_size=input_size, output_size=output_size,
|
||||
step=step, time_index=time_index,
|
||||
del_column_name=del_column_name)
|
||||
normal_dl = tud.DataLoader(dataset=ds, batch_size=batch_size, shuffle=True)
|
||||
ds.mode = "test"
|
||||
attack_dl = tud.DataLoader(dataset=ds, batch_size=batch_size, shuffle=False)
|
||||
return normal_dl, attack_dl
|
||||
|
||||
|
||||
def create_all_dataloader(datasets: [str], input_size: int = 1, output_size: int = 1, step: int = 1,
|
||||
batch_size: int = 1, time_index: bool = True, del_column_name: bool = True,
|
||||
preprocess_name: str = "standardization") -> [{}]:
|
||||
"""
|
||||
对所有数据集构建dataloader
|
||||
:param datasets: 数据集列表
|
||||
:param input_size: 输入数据长度
|
||||
:param output_size: 输出数据长度
|
||||
:param step: 截取数据的窗口移动间隔
|
||||
:param batch_size: batch的大小
|
||||
:param time_index: True为第一列是时间戳,False为不。
|
||||
:param del_column_name: 文件中第一行为列名时,使用True
|
||||
:param preprocess_name: 预处理方法
|
||||
:return: 所有数据集的dataloader构建结果
|
||||
"""
|
||||
all_dataloader = []
|
||||
for dataset_name in datasets:
|
||||
logger.info(f'开始建立 dataloader {dataset_name}')
|
||||
if "train.csv" in os.listdir(f"./dataset/{dataset_name}"):
|
||||
normal_dl, attack_dl = create_dataloader(dataset_name=dataset_name, input_size=input_size,
|
||||
output_size=output_size, step=step, batch_size=batch_size,
|
||||
time_index=time_index, del_column_name=del_column_name,
|
||||
preprocess_name=preprocess_name)
|
||||
all_dataloader.append([{
|
||||
'dataset_name': dataset_name,
|
||||
'normal': normal_dl,
|
||||
'attack': attack_dl
|
||||
}])
|
||||
else:
|
||||
all_sub_dataloader = []
|
||||
for sub_dataset_dir in os.listdir(f"./dataset/{dataset_name}"):
|
||||
sub_dataset_name = f"{dataset_name}/{sub_dataset_dir}"
|
||||
normal_dl, attack_dl = create_dataloader(dataset_name=sub_dataset_name, input_size=input_size,
|
||||
output_size=output_size, step=step, batch_size=batch_size,
|
||||
del_time=del_time, del_column_name=del_column_name)
|
||||
all_sub_dataloader.append({
|
||||
'dataset_name': sub_dataset_name.replace("/", "-"),
|
||||
'normal': normal_dl,
|
||||
'attack': attack_dl
|
||||
})
|
||||
all_dataloader.append(all_sub_dataloader)
|
||||
|
||||
logger.info(f'完成建立 dataloader {dataset_name}')
|
||||
return all_dataloader
|
||||
|
||||
|
||||
class EvaluationScore:
|
||||
def __init__(self, evaluations: [str], attack=1):
|
||||
"""
|
||||
用于自动划分阈值并进行批量评估
|
||||
:param evaluations: 使用的评估方法名称(需在evaluation文件夹中进行定义)
|
||||
:param attack: 异常的标签,0 or 1
|
||||
"""
|
||||
self.time = 0
|
||||
self.f1 = 0
|
||||
self.f1_pa = 0
|
||||
self.f_tad = 0
|
||||
self.attack = attack
|
||||
self.normal = 1 - attack
|
||||
self._total_y_loss = None
|
||||
self._total_label = None
|
||||
self._total_pred_label = None
|
||||
self.true_pred_df = None
|
||||
self.true_pred_dict = None
|
||||
self.evaluations = evaluations
|
||||
self.scores = {}
|
||||
|
||||
def add_data(self, y_loss, true_label, pred_label=None):
|
||||
"""
|
||||
添加每个batch的数据
|
||||
:param y_loss: 数据偏差
|
||||
:param true_label: 真实数据标签
|
||||
:param pred_label: 预测标签
|
||||
"""
|
||||
if pred_label is not None:
|
||||
if self._total_label is None and self._total_pred_label is None:
|
||||
self._total_label = true_label
|
||||
self._total_pred_label = pred_label
|
||||
else:
|
||||
self._total_label = torch.cat([self._total_label, true_label], dim=0)
|
||||
self._total_pred_label = torch.cat([self._total_pred_label, pred_label], dim=0)
|
||||
return
|
||||
|
||||
y_loss = y_loss.view(-1).cpu().detach().numpy()
|
||||
true_label = true_label.view(-1).cpu().detach().numpy()
|
||||
|
||||
if self._total_y_loss is None and self._total_label is None:
|
||||
self._total_y_loss = y_loss
|
||||
self._total_label = true_label
|
||||
return
|
||||
self._total_y_loss = numpy.concatenate((self._total_y_loss, y_loss), axis=0)
|
||||
self._total_label = numpy.concatenate((self._total_label, true_label), axis=0)
|
||||
|
||||
def best_threshold(self, true_label: list, y_loss: list) -> dict:
|
||||
ret = {}
|
||||
for func_name in self.evaluations:
|
||||
threshold_max = max(y_loss)
|
||||
threshold_min = 0
|
||||
best_threshold = 0
|
||||
for _ in range(5):
|
||||
threshold_list = [threshold_max - i * (threshold_max - threshold_min) / 10 for i in range(11)]
|
||||
f1_list = []
|
||||
for threshold_one in threshold_list:
|
||||
prediction_loss = numpy.where(numpy.array(y_loss) > threshold_one, self.attack, self.normal)
|
||||
f1 = eval(f"evaluation.{func_name}.evaluate")(y_true=true_label, y_pred=prediction_loss.tolist())
|
||||
f1_list.append(f1)
|
||||
ind = f1_list.index(max(f1_list))
|
||||
best_threshold = threshold_list[ind]
|
||||
if ind == 0:
|
||||
threshold_max = threshold_list[ind]
|
||||
threshold_min = threshold_list[ind+1]
|
||||
elif ind == len(threshold_list)-1:
|
||||
threshold_max = threshold_list[ind-1]
|
||||
threshold_min = threshold_list[ind]
|
||||
else:
|
||||
threshold_max = threshold_list[ind-1]
|
||||
threshold_min = threshold_list[ind+1]
|
||||
ret[func_name] = best_threshold
|
||||
return ret
|
||||
|
||||
def auto_threshold(self):
|
||||
if self._total_pred_label is not None:
|
||||
return
|
||||
self._total_y_loss[numpy.isnan(self._total_y_loss)] = 0
|
||||
self._total_y_loss = self._total_y_loss / max(self._total_y_loss)
|
||||
thresholds = self.best_threshold(
|
||||
self._total_label.reshape(-1).data.tolist(), self._total_y_loss.reshape(-1).data.tolist())
|
||||
self.true_pred_dict = {
|
||||
'true': self._total_label.squeeze().tolist()
|
||||
}
|
||||
for func_name in thresholds:
|
||||
self.true_pred_dict[func_name] = \
|
||||
numpy.where(self._total_y_loss > thresholds[func_name], self.attack, self.normal).squeeze().tolist()
|
||||
# self.true_pred_df = pandas.DataFrame(self.true_pred_dict)
|
||||
for func_name in self.true_pred_dict:
|
||||
if func_name == "true":
|
||||
continue
|
||||
self.scores[func_name] = self.get_score(func_name)
|
||||
|
||||
def get_score(self, func_name):
|
||||
if self._total_pred_label is not None:
|
||||
return eval(f"evaluation.{func_name}.evaluate")(self._total_label.reshape(-1).tolist(),
|
||||
self._total_pred_label.reshape(-1).tolist())
|
||||
return eval(f"evaluation.{func_name}.evaluate")(self._total_label.reshape(-1).tolist(),
|
||||
self.true_pred_dict[f"{func_name}"])
|
||||
|
||||
def __str__(self):
|
||||
res = ""
|
||||
for func_name in self.scores:
|
||||
res += f"{func_name}={self.scores[func_name]:.3f} "
|
||||
return res[:-1]
|
||||
|
||||
|
||||
def train_model(epoch: int, optimizer: torch.optim, dataloader: tud.DataLoader, model: nn.Module,
|
||||
device: str = "cpu") -> (nn.Module, str):
|
||||
"""
|
||||
训练模型
|
||||
:param epoch: 当前训练轮数
|
||||
:param optimizer: 优化器
|
||||
:param dataloader: 数据集
|
||||
:param model: 模型
|
||||
:param device: 训练设备使用cpu还是gpu
|
||||
:return: 训练完成的模型;训练完成需要输出的信息
|
||||
"""
|
||||
model.train()
|
||||
avg_loss = []
|
||||
dataloader.dataset.mode = "train"
|
||||
start_time = time.time()
|
||||
with tqdm(total=len(dataloader), ncols=150) as _tqdm:
|
||||
_tqdm.set_description(f'(进度条部分不会写进本地日志)epoch:{epoch},训练进度')
|
||||
for data in dataloader:
|
||||
x = data[0].to(device)
|
||||
y_true = data[2].to(device)
|
||||
optimizer.zero_grad()
|
||||
loss = model.loss(x=x, y_true=y_true, epoch=epoch, device=device)
|
||||
avg_loss.append(loss)
|
||||
optimizer.step()
|
||||
_tqdm.set_postfix(loss='{:.6f}'.format(sum(avg_loss) / len(avg_loss)))
|
||||
_tqdm.update(1)
|
||||
end_time = time.time()
|
||||
info = f"epoch={epoch}, average loss={'{:.6f}'.format(sum(avg_loss) / len(avg_loss))}, " \
|
||||
f"train time={'{:.1f}'.format(end_time-start_time)}s"
|
||||
return model, info
|
||||
|
||||
|
||||
def test_model(dataloader: tud.DataLoader, model: nn.Module, evaluations: [str], device: str = "cpu") -> \
|
||||
(EvaluationScore, str):
|
||||
"""
|
||||
测试模型
|
||||
:param dataloader: 数据集
|
||||
:param model: 模型
|
||||
:param device: 训练设备使用cpu还是gpu
|
||||
:return: 评估分数;测试完成需要输出的信息
|
||||
"""
|
||||
es = EvaluationScore(evaluations)
|
||||
model.eval()
|
||||
dataloader.dataset.mode = "test"
|
||||
start_time = time.time()
|
||||
with tqdm(total=len(dataloader), ncols=150) as _tqdm:
|
||||
_tqdm.set_description(f'(进度条部分不会写进本地日志)测试进度')
|
||||
with torch.no_grad():
|
||||
for data in dataloader:
|
||||
x = data[0].to(device)
|
||||
y_true = data[2].to(device)
|
||||
label_true = data[1].int().to(device)
|
||||
y_loss, label_pred = model.detection(x=x, y_true=y_true, device=device)
|
||||
if label_pred is not None:
|
||||
es.add_data(y_loss=None, true_label=label_true, pred_label=label_pred)
|
||||
else:
|
||||
es.add_data(y_loss=y_loss, true_label=label_true, pred_label=None)
|
||||
_tqdm.update(1)
|
||||
end_time = time.time()
|
||||
es.auto_threshold()
|
||||
es_score = es.__str__().replace(" ", ", ")
|
||||
info = f"{es_score}, test time={'{:.1f}'.format(end_time-start_time)}s"
|
||||
return es, info
|
||||
|
||||
|
||||
def train_and_test_model(start_time: str, epochs: int, normal_dataloader: tud.DataLoader, attack_dataloader: tud.DataLoader,
|
||||
model: nn.Module, evaluations: [str], device: str = "cpu", lr: float = 1e-4,
|
||||
model_path: str = None, train: bool = True) -> (dict, dict):
|
||||
"""
|
||||
训练与测试
|
||||
:param start_time: 实验的开始时间。此处用于寻找存放路径。
|
||||
:param epochs: 总共训练轮数
|
||||
:param normal_dataloader: 训练数据集
|
||||
:param attack_dataloader: 测试数据集
|
||||
:param model: 模型
|
||||
:param evaluations: 评估方法
|
||||
:param device: 设备
|
||||
:param lr: 学习率
|
||||
:param model_path: 模型参数文件路径
|
||||
:param train: 是否训练,如果为否,则仅进行测试
|
||||
:return: 各个评估方法的最佳分数、各个评估方法最佳情况下的检测标签
|
||||
"""
|
||||
dataset_name = normal_dataloader.dataset.name
|
||||
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
|
||||
if model_path:
|
||||
try:
|
||||
checkpoint = torch.load(model_path)
|
||||
model.load_state_dict(checkpoint['model_state_dict'])
|
||||
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
|
||||
logger.info(f"模型参数文件{model_path}加载成功")
|
||||
except:
|
||||
logger.warning(f"模型参数文件{model_path}加载失败,将训练新模型")
|
||||
logger.info(f"模型:{model.name},数据集:{dataset_name},设备:{device},训练开始")
|
||||
best_score = {}
|
||||
best_detection = {}
|
||||
if train:
|
||||
logger.info(f"模式:训练并测试")
|
||||
for epoch in range(1, epochs+1):
|
||||
model, train_info = train_model(epoch=epoch, optimizer=optimizer, dataloader=normal_dataloader, model=model,
|
||||
device=device)
|
||||
es, test_info = test_model(dataloader=attack_dataloader, model=model, evaluations=evaluations,
|
||||
device=device)
|
||||
logger.info(f"{train_info}, {test_info}")
|
||||
es_score = es.__str__().replace(" ", "_")
|
||||
torch.save({
|
||||
'model_state_dict': model.state_dict(),
|
||||
'optimizer_state_dict': optimizer.state_dict()
|
||||
}, f'./records/{start_time}/model/model={model.name}_dataset={dataset_name}_epoch={epoch}_{es_score}.pth')
|
||||
for func_name in es.scores:
|
||||
if func_name not in best_score or es.scores[func_name] > best_score[func_name]:
|
||||
best_score[func_name] = es.scores[func_name]
|
||||
best_detection[func_name] = es.true_pred_dict[func_name]
|
||||
best_detection["true"] = es.true_pred_dict["true"]
|
||||
else:
|
||||
logger.info(f"模式:仅进行测试")
|
||||
es, test_info = test_model(dataloader=attack_dataloader, model=model, evaluations=evaluations, device=device)
|
||||
logger.info(test_info)
|
||||
for func_name in es.scores:
|
||||
best_score[func_name] = es.scores[func_name]
|
||||
best_detection[func_name] = es.true_pred_dict[func_name]
|
||||
best_detection["true"] = es.true_pred_dict["true"]
|
||||
return best_score, best_detection
|
||||
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user