首次提交本地代码

This commit is contained in:
zyq
2023-05-25 15:30:02 +08:00
parent 0ead9d742b
commit 2236622f33
27 changed files with 453475 additions and 0 deletions

50
config.ini Normal file
View File

@@ -0,0 +1,50 @@
[Dataset]
# 填写数据集名称(文件夹名称),同时训练测试多个数据集,请用逗号隔开
# 请确保数据集已经按照要求格式放置在./dataset路径下
name=SWAT
[Method]
# 填写模型名称(文件名称,不用写".py"),同时训练测试多个算法,请用逗号隔开
# 请确保模型文件已经按照要求格式放置在./model路径下
# 模型名称请全部由字母组成
name=AnomalyTransformer
[Preprocess]
# 填写预处理方法名称(文件名称,不用写".py"
# 请确保预处理方法文件已经按照要求格式放置在./preprocess路径下
name=standardization
[Evaluation]
# 填写评估方法名称(文件名称,不用写".py"),同时使用多个评估方法,请用逗号隔开
# 请确保评估方法文件已经按照要求格式放置在./evaluation路径下
name=f1,f1pa,ftad,affiliation
[ModelPath]
# 模型加载先前参数,请用"模型名_数据集名"作为变量名
# 路径名称从TSAD文件夹下开始写相对路径或者直接写绝对路径
MtadGatAtt_SWAT=none
[BaseParameters]
# train表示是否训练模型请填写true或者false为false时仅测试模型
train=true
# epoch表示训练轮数请填写正整数
epochs=20
# batch-size表示每个batch的尺寸请填写正整数
batch_size=32
# learning-rate表示学习率建议不要高于0.001
learning_rate=1e-4
# device表示使用的设备可以选auto、cpu、cuda其中auto代表自动判断有gpu使用gpu没有则使用cpu选gpu时需要选择gpu序号例如cuda:0
device=auto
[CustomParameters]
# 此处可以填入自定义参数,数量不限,用于填写特殊需求输入,例如模型文件所需的所有外部参数,构建数据集的部分参数
# 输入数据长度input_size即截取数据的窗口长度
input_size=100
# 输出数据长度output_size
output_size=1
# 截取数据的窗口的移动步长
step=1
# 数据集文件csv第一行是否需要忽略第一行为列名的话需要忽略
del_column_name=true
# 数据集文件csv第一列是否是时间戳
time_index=true

250
daemon.py Normal file
View File

@@ -0,0 +1,250 @@
'''
***
Modified generic daemon class
***
Author: http://www.jejik.com/articles/2007/02/
a_simple_unix_linux_daemon_in_python/www.boxedice.com
License: http://creativecommons.org/licenses/by-sa/3.0/
Changes: 23rd Jan 2009 (David Mytton <david@boxedice.com>)
- Replaced hard coded '/dev/null in __init__ with os.devnull
- Added OS check to conditionally remove code that doesn't
work on OS X
- Added output to console on completion
- Tidied up formatting
11th Mar 2009 (David Mytton <david@boxedice.com>)
- Fixed problem with daemon exiting on Python 2.4
(before SystemExit was part of the Exception base)
13th Aug 2010 (David Mytton <david@boxedice.com>
- Fixed unhandled exception if PID file is empty
'''
# Core modules
from __future__ import print_function
import atexit
import errno
import os
import sys
import time
import signal
class Daemon(object):
"""
A generic daemon class.
Usage: subclass the Daemon class and override the run() method
"""
def __init__(self, pidfile, stdin=os.devnull,
stdout=os.devnull, stderr=os.devnull,
home_dir='.', umask=0o22, verbose=1,
use_gevent=False, use_eventlet=False):
self.stdin = stdin
self.stdout = stdout
self.stderr = stderr
self.pidfile = pidfile
self.home_dir = home_dir
self.verbose = verbose
self.umask = umask
self.daemon_alive = True
self.use_gevent = use_gevent
self.use_eventlet = use_eventlet
def log(self, *args):
if self.verbose >= 1:
print(*args)
def daemonize(self):
"""
Do the UNIX double-fork magic, see Stevens' "Advanced
Programming in the UNIX Environment" for details (ISBN 0201563177)
http://www.erlenstar.demon.co.uk/unix/faq_2.html#SEC16
"""
if self.use_eventlet:
import eventlet.tpool
eventlet.tpool.killall()
try:
pid = os.fork()
if pid > 0:
# Exit first parent
sys.exit(0)
except OSError as e:
sys.stderr.write(
"fork #1 failed: %d (%s)\n" % (e.errno, e.strerror))
sys.exit(1)
# Decouple from parent environment
os.chdir(self.home_dir)
os.setsid()
os.umask(self.umask)
# Do second fork
try:
pid = os.fork()
if pid > 0:
# Exit from second parent
sys.exit(0)
except OSError as e:
sys.stderr.write(
"fork #2 failed: %d (%s)\n" % (e.errno, e.strerror))
sys.exit(1)
if sys.platform != 'darwin': # This block breaks on OS X
# Redirect standard file descriptors
sys.stdout.flush()
sys.stderr.flush()
si = open(self.stdin, 'r')
so = open(self.stdout, 'a+')
if self.stderr:
try:
se = open(self.stderr, 'a+', 0)
except ValueError:
# Python 3 can't have unbuffered text I/O
se = open(self.stderr, 'a+', 1)
else:
se = so
os.dup2(si.fileno(), sys.stdin.fileno())
os.dup2(so.fileno(), sys.stdout.fileno())
os.dup2(se.fileno(), sys.stderr.fileno())
def sigtermhandler(signum, frame):
self.daemon_alive = False
sys.exit()
if self.use_gevent:
import gevent
gevent.reinit()
gevent.signal(signal.SIGTERM, sigtermhandler, signal.SIGTERM, None)
gevent.signal(signal.SIGINT, sigtermhandler, signal.SIGINT, None)
else:
signal.signal(signal.SIGTERM, sigtermhandler)
signal.signal(signal.SIGINT, sigtermhandler)
self.log("Started")
# Write pidfile
atexit.register(
self.delpid) # Make sure pid file is removed if we quit
pid = str(os.getpid())
open(self.pidfile, 'w+').write("%s\n" % pid)
def delpid(self):
try:
# the process may fork itself again
pid = int(open(self.pidfile, 'r').read().strip())
if pid == os.getpid():
os.remove(self.pidfile)
except OSError as e:
if e.errno == errno.ENOENT:
pass
else:
raise
def start(self, *args, **kwargs):
"""
Start the daemon
"""
self.log("Starting...")
# Check for a pidfile to see if the daemon already runs
try:
pf = open(self.pidfile, 'r')
pid = int(pf.read().strip())
pf.close()
except IOError:
pid = None
except SystemExit:
pid = None
if pid:
message = "pidfile %s already exists. Is it already running?\n"
sys.stderr.write(message % self.pidfile)
sys.exit(1)
# Start the daemon
self.daemonize()
self.run(*args, **kwargs)
def stop(self):
"""
Stop the daemon
"""
if self.verbose >= 1:
self.log("Stopping...")
# Get the pid from the pidfile
pid = self.get_pid()
if not pid:
message = "pidfile %s does not exist. Not running?\n"
sys.stderr.write(message % self.pidfile)
# Just to be sure. A ValueError might occur if the PID file is
# empty but does actually exist
if os.path.exists(self.pidfile):
os.remove(self.pidfile)
sys.exit(0) # Not an error in a restart
# Try killing the daemon process
try:
i = 0
while 1:
os.kill(pid, signal.SIGTERM)
time.sleep(0.1)
i = i + 1
if i % 10 == 0:
os.kill(pid, signal.SIGHUP)
except OSError as err:
if err.errno == errno.ESRCH:
if os.path.exists(self.pidfile):
os.remove(self.pidfile)
else:
print(str(err))
sys.exit(1)
self.log("Stopped")
def restart(self):
"""
Restart the daemon
"""
self.stop()
self.start()
def get_pid(self):
try:
pf = open(self.pidfile, 'r')
pid = int(pf.read().strip())
pf.close()
except IOError:
pid = None
except SystemExit:
pid = None
return pid
def is_running(self):
pid = self.get_pid()
if pid is None:
self.log('Process is stopped')
return False
elif os.path.exists('/proc/%d' % pid):
self.log('Process (pid %d) is running...' % pid)
return True
else:
self.log('Process (pid %d) is killed' % pid)
return False
def run(self):
"""
You should override this method when you subclass Daemon.
It will be called after the process has been
daemonized by start() or restart().
"""
raise NotImplementedError

4
evaluation/__init__.py Normal file
View File

@@ -0,0 +1,4 @@
from .ftad import evaluate
from .f1pa import evaluate
from .affiliation import evaluate
from .f1 import evaluate

23
evaluation/affiliation.py Normal file
View File

@@ -0,0 +1,23 @@
from .affiliation_bin.generics import convert_vector_to_events
from .affiliation_bin.metrics import pr_from_events
def evaluate(y_true: list, y_pred: list) -> float:
"""
F1PA评估方法经过point adjust调整标签后再用F1评分
:param y_true: 真实标签
:param y_pred: 检测标签
:return: affiliation的三个score
"""
true, pred = y_true.copy(), y_pred.copy()
events_pred = convert_vector_to_events(pred)
events_gt = convert_vector_to_events(true)
Trange = (0, len(pred))
res = pr_from_events(events_pred, events_gt, Trange)
aff_precision = res["precision"]
aff_recall = res["recall"]
if aff_recall == 0 or aff_precision == 0:
return 0
aff_f1 = 2 * aff_precision * aff_recall / (aff_precision + aff_recall)
return aff_f1

View File

@@ -0,0 +1 @@
from .metrics import pr_from_events

View File

@@ -0,0 +1,91 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from ._integral_interval import interval_intersection
def t_start(j, Js=[(1, 2), (3, 4), (5, 6)], Trange=(1, 10)):
"""
Helper for `E_gt_func`
:param j: index from 0 to len(Js) (included) on which to get the start
:param Js: ground truth events, as a list of couples
:param Trange: range of the series where Js is included
:return: generalized start such that the middle of t_start and t_stop
always gives the affiliation_bin zone
"""
b = max(Trange)
n = len(Js)
if j == n:
return (2 * b - t_stop(n - 1, Js, Trange))
else:
return (Js[j][0])
def t_stop(j, Js=[(1, 2), (3, 4), (5, 6)], Trange=(1, 10)):
"""
Helper for `E_gt_func`
:param j: index from 0 to len(Js) (included) on which to get the stop
:param Js: ground truth events, as a list of couples
:param Trange: range of the series where Js is included
:return: generalized stop such that the middle of t_start and t_stop
always gives the affiliation_bin zone
"""
if j == -1:
a = min(Trange)
return (2 * a - t_start(0, Js, Trange))
else:
return (Js[j][1])
def E_gt_func(j, Js, Trange):
"""
Get the affiliation_bin zone of element j of the ground truth
:param j: index from 0 to len(Js) (excluded) on which to get the zone
:param Js: ground truth events, as a list of couples
:param Trange: range of the series where Js is included, can
be (-math.inf, math.inf) for distance measures
:return: affiliation_bin zone of element j of the ground truth represented
as a couple
"""
range_left = (t_stop(j - 1, Js, Trange) + t_start(j, Js, Trange)) / 2
range_right = (t_stop(j, Js, Trange) + t_start(j + 1, Js, Trange)) / 2
return ((range_left, range_right))
def get_all_E_gt_func(Js, Trange):
"""
Get the affiliation_bin partition from the ground truth point of view
:param Js: ground truth events, as a list of couples
:param Trange: range of the series where Js is included, can
be (-math.inf, math.inf) for distance measures
:return: affiliation_bin partition of the events
"""
# E_gt is the limit of affiliation_bin/attraction for each ground truth event
E_gt = [E_gt_func(j, Js, Trange) for j in range(len(Js))]
return (E_gt)
def affiliation_partition(Is=[(1, 1.5), (2, 5), (5, 6), (8, 9)], E_gt=[(1, 2.5), (2.5, 4.5), (4.5, 10)]):
"""
Cut the events into the affiliation_bin zones
The presentation given here is from the ground truth point of view,
but it is also used in the reversed direction in the main function.
:param Is: events as a list of couples
:param E_gt: range of the affiliation_bin zones
:return: a list of list of intervals (each interval represented by either
a couple or None for empty interval). The outer list is indexed by each
affiliation_bin zone of `E_gt`. The inner list is indexed by the events of `Is`.
"""
out = [None] * len(E_gt)
for j in range(len(E_gt)):
E_gt_j = E_gt[j]
discarded_idx_before = [I[1] < E_gt_j[0] for I in Is] # end point of predicted I is before the begin of E
discarded_idx_after = [I[0] > E_gt_j[1] for I in Is] # start of predicted I is after the end of E
kept_index = [not (a or b) for a, b in zip(discarded_idx_before, discarded_idx_after)]
Is_j = [x for x, y in zip(Is, kept_index)]
out[j] = [interval_intersection(I, E_gt[j]) for I in Is_j]
return (out)

View File

@@ -0,0 +1,493 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import math
from .generics import _sum_wo_nan
"""
In order to shorten the length of the variables,
the general convention in this file is to let:
- I for a predicted event (start, stop),
- Is for a list of predicted events,
- J for a ground truth event,
- Js for a list of ground truth events.
"""
def interval_length(J=(1, 2)):
"""
Length of an interval
:param J: couple representating the start and stop of an interval, or None
:return: length of the interval, and 0 for a None interval
"""
if J is None:
return (0)
return (J[1] - J[0])
def sum_interval_lengths(Is=[(1, 2), (3, 4), (5, 6)]):
"""
Sum of length of the intervals
:param Is: list of intervals represented by starts and stops
:return: sum of the interval length
"""
return (sum([interval_length(I) for I in Is]))
def interval_intersection(I=(1, 3), J=(2, 4)):
"""
Intersection between two intervals I and J
I and J should be either empty or represent a positive interval (no point)
:param I: an interval represented by start and stop
:param J: a second interval of the same form
:return: an interval representing the start and stop of the intersection (or None if empty)
"""
if I is None:
return (None)
if J is None:
return (None)
I_inter_J = (max(I[0], J[0]), min(I[1], J[1]))
if I_inter_J[0] >= I_inter_J[1]:
return (None)
else:
return (I_inter_J)
def interval_subset(I=(1, 3), J=(0, 6)):
"""
Checks whether I is a subset of J
:param I: an non empty interval represented by start and stop
:param J: a second non empty interval of the same form
:return: True if I is a subset of J
"""
if (I[0] >= J[0]) and (I[1] <= J[1]):
return True
else:
return False
def cut_into_three_func(I, J):
"""
Cut an interval I into a partition of 3 subsets:
the elements before J,
the elements belonging to J,
and the elements after J
:param I: an interval represented by start and stop, or None for an empty one
:param J: a non empty interval
:return: a triplet of three intervals, each represented by either (start, stop) or None
"""
if I is None:
return ((None, None, None))
I_inter_J = interval_intersection(I, J)
if I == I_inter_J:
I_before = None
I_after = None
elif I[1] <= J[0]:
I_before = I
I_after = None
elif I[0] >= J[1]:
I_before = None
I_after = I
elif (I[0] <= J[0]) and (I[1] >= J[1]):
I_before = (I[0], I_inter_J[0])
I_after = (I_inter_J[1], I[1])
elif I[0] <= J[0]:
I_before = (I[0], I_inter_J[0])
I_after = None
elif I[1] >= J[1]:
I_before = None
I_after = (I_inter_J[1], I[1])
else:
raise ValueError('unexpected unconsidered case')
return (I_before, I_inter_J, I_after)
def get_pivot_j(I, J):
"""
Get the single point of J that is the closest to I, called 'pivot' here,
with the requirement that I should be outside J
:param I: a non empty interval (start, stop)
:param J: another non empty interval, with empty intersection with I
:return: the element j of J that is the closest to I
"""
if interval_intersection(I, J) is not None:
raise ValueError('I and J should have a void intersection')
j_pivot = None # j_pivot is a border of J
if max(I) <= min(J):
j_pivot = min(J)
elif min(I) >= max(J):
j_pivot = max(J)
else:
raise ValueError('I should be outside J')
return (j_pivot)
def integral_mini_interval(I, J):
"""
In the specific case where interval I is located outside J,
integral of distance from x to J over the interval x \in I.
This is the *integral* i.e. the sum.
It's not the mean (not divided by the length of I yet)
:param I: a interval (start, stop), or None
:param J: a non empty interval, with empty intersection with I
:return: the integral of distances d(x, J) over x \in I
"""
if I is None:
return (0)
j_pivot = get_pivot_j(I, J)
a = min(I)
b = max(I)
return ((b - a) * abs((j_pivot - (a + b) / 2)))
def integral_interval_distance(I, J):
"""
For any non empty intervals I, J, compute the
integral of distance from x to J over the interval x \in I.
This is the *integral* i.e. the sum.
It's not the mean (not divided by the length of I yet)
The interval I can intersect J or not
:param I: a interval (start, stop), or None
:param J: a non empty interval
:return: the integral of distances d(x, J) over x \in I
"""
# I and J are single intervals (not generic sets)
# I is a predicted interval in the range of affiliation_bin of J
def f(I_cut):
return (integral_mini_interval(I_cut, J))
# If I_middle is fully included into J, it is
# the distance to J is always 0
def f0(I_middle):
return (0)
cut_into_three = cut_into_three_func(I, J)
# Distance for now, not the mean:
# Distance left: Between cut_into_three[0] and the point min(J)
d_left = f(cut_into_three[0])
# Distance middle: Between cut_into_three[1] = I inter J, and J
d_middle = f0(cut_into_three[1])
# Distance right: Between cut_into_three[2] and the point max(J)
d_right = f(cut_into_three[2])
# It's an integral so summable
return (d_left + d_middle + d_right)
def integral_mini_interval_P_CDFmethod__min_piece(I, J, E):
"""
Helper of `integral_mini_interval_Pprecision_CDFmethod`
In the specific case where interval I is located outside J,
compute the integral $\int_{d_min}^{d_max} \min(m, x) dx$, with:
- m the smallest distance from J to E,
- d_min the smallest distance d(x, J) from x \in I to J
- d_max the largest distance d(x, J) from x \in I to J
:param I: a single predicted interval, a non empty interval (start, stop)
:param J: ground truth interval, a non empty interval, with empty intersection with I
:param E: the affiliation_bin/influence zone for J, represented as a couple (start, stop)
:return: the integral $\int_{d_min}^{d_max} \min(m, x) dx$
"""
if interval_intersection(I, J) is not None:
raise ValueError('I and J should have a void intersection')
if not interval_subset(J, E):
raise ValueError('J should be included in E')
if not interval_subset(I, E):
raise ValueError('I should be included in E')
e_min = min(E)
j_min = min(J)
j_max = max(J)
e_max = max(E)
i_min = min(I)
i_max = max(I)
d_min = max(i_min - j_max, j_min - i_max)
d_max = max(i_max - j_max, j_min - i_min)
m = min(j_min - e_min, e_max - j_max)
A = min(d_max, m) ** 2 - min(d_min, m) ** 2
B = max(d_max, m) - max(d_min, m)
C = (1 / 2) * A + m * B
return (C)
def integral_mini_interval_Pprecision_CDFmethod(I, J, E):
"""
Integral of the probability of distances over the interval I.
In the specific case where interval I is located outside J,
compute the integral $\int_{x \in I} Fbar(dist(x,J)) dx$.
This is the *integral* i.e. the sum (not the mean)
:param I: a single predicted interval, a non empty interval (start, stop)
:param J: ground truth interval, a non empty interval, with empty intersection with I
:param E: the affiliation_bin/influence zone for J, represented as a couple (start, stop)
:return: the integral $\int_{x \in I} Fbar(dist(x,J)) dx$
"""
integral_min_piece = integral_mini_interval_P_CDFmethod__min_piece(I, J, E)
e_min = min(E)
j_min = min(J)
j_max = max(J)
e_max = max(E)
i_min = min(I)
i_max = max(I)
d_min = max(i_min - j_max, j_min - i_max)
d_max = max(i_max - j_max, j_min - i_min)
integral_linear_piece = (1 / 2) * (d_max ** 2 - d_min ** 2)
integral_remaining_piece = (j_max - j_min) * (i_max - i_min)
DeltaI = i_max - i_min
DeltaE = e_max - e_min
output = DeltaI - (1 / DeltaE) * (integral_min_piece + integral_linear_piece + integral_remaining_piece)
return (output)
def integral_interval_probaCDF_precision(I, J, E):
"""
Integral of the probability of distances over the interval I.
Compute the integral $\int_{x \in I} Fbar(dist(x,J)) dx$.
This is the *integral* i.e. the sum (not the mean)
:param I: a single (non empty) predicted interval in the zone of affiliation_bin of J
:param J: ground truth interval
:param E: affiliation_bin/influence zone for J
:return: the integral $\int_{x \in I} Fbar(dist(x,J)) dx$
"""
# I and J are single intervals (not generic sets)
def f(I_cut):
if I_cut is None:
return (0)
else:
return (integral_mini_interval_Pprecision_CDFmethod(I_cut, J, E))
# If I_middle is fully included into J, it is
# integral of 1 on the interval I_middle, so it's |I_middle|
def f0(I_middle):
if I_middle is None:
return (0)
else:
return (max(I_middle) - min(I_middle))
cut_into_three = cut_into_three_func(I, J)
# Distance for now, not the mean:
# Distance left: Between cut_into_three[0] and the point min(J)
d_left = f(cut_into_three[0])
# Distance middle: Between cut_into_three[1] = I inter J, and J
d_middle = f0(cut_into_three[1])
# Distance right: Between cut_into_three[2] and the point max(J)
d_right = f(cut_into_three[2])
# It's an integral so summable
return (d_left + d_middle + d_right)
def cut_J_based_on_mean_func(J, e_mean):
"""
Helper function for the recall.
Partition J into two intervals: before and after e_mean
(e_mean represents the center element of E the zone of affiliation_bin)
:param J: ground truth interval
:param e_mean: a float number (center value of E)
:return: a couple partitionning J into (J_before, J_after)
"""
if J is None:
J_before = None
J_after = None
elif e_mean >= max(J):
J_before = J
J_after = None
elif e_mean <= min(J):
J_before = None
J_after = J
else: # e_mean is across J
J_before = (min(J), e_mean)
J_after = (e_mean, max(J))
return ((J_before, J_after))
def integral_mini_interval_Precall_CDFmethod(I, J, E):
"""
Integral of the probability of distances over the interval J.
In the specific case where interval J is located outside I,
compute the integral $\int_{y \in J} Fbar_y(dist(y,I)) dy$.
This is the *integral* i.e. the sum (not the mean)
:param I: a single (non empty) predicted interval
:param J: ground truth (non empty) interval, with empty intersection with I
:param E: the affiliation_bin/influence zone for J, represented as a couple (start, stop)
:return: the integral $\int_{y \in J} Fbar_y(dist(y,I)) dy$
"""
# The interval J should be located outside I
# (so it's either the left piece or the right piece w.r.t I)
i_pivot = get_pivot_j(J, I)
e_min = min(E)
e_max = max(E)
e_mean = (e_min + e_max) / 2
# If i_pivot is outside E (it's possible), then
# the distance is worst that any random element within E,
# so we set the recall to 0
if i_pivot <= min(E):
return (0)
elif i_pivot >= max(E):
return (0)
# Otherwise, we have at least i_pivot in E and so d < M so min(d,M)=d
cut_J_based_on_e_mean = cut_J_based_on_mean_func(J, e_mean)
J_before = cut_J_based_on_e_mean[0]
J_after = cut_J_based_on_e_mean[1]
iemin_mean = (e_min + i_pivot) / 2
cut_Jbefore_based_on_iemin_mean = cut_J_based_on_mean_func(J_before, iemin_mean)
J_before_closeE = cut_Jbefore_based_on_iemin_mean[
0] # before e_mean and closer to e_min than i_pivot ~ J_before_before
J_before_closeI = cut_Jbefore_based_on_iemin_mean[
1] # before e_mean and closer to i_pivot than e_min ~ J_before_after
iemax_mean = (e_max + i_pivot) / 2
cut_Jafter_based_on_iemax_mean = cut_J_based_on_mean_func(J_after, iemax_mean)
J_after_closeI = cut_Jafter_based_on_iemax_mean[0] # after e_mean and closer to i_pivot than e_max ~ J_after_before
J_after_closeE = cut_Jafter_based_on_iemax_mean[1] # after e_mean and closer to e_max than i_pivot ~ J_after_after
if J_before_closeE is not None:
j_before_before_min = min(J_before_closeE) # == min(J)
j_before_before_max = max(J_before_closeE)
else:
j_before_before_min = math.nan
j_before_before_max = math.nan
if J_before_closeI is not None:
j_before_after_min = min(J_before_closeI) # == j_before_before_max if existing
j_before_after_max = max(J_before_closeI) # == max(J_before)
else:
j_before_after_min = math.nan
j_before_after_max = math.nan
if J_after_closeI is not None:
j_after_before_min = min(J_after_closeI) # == min(J_after)
j_after_before_max = max(J_after_closeI)
else:
j_after_before_min = math.nan
j_after_before_max = math.nan
if J_after_closeE is not None:
j_after_after_min = min(J_after_closeE) # == j_after_before_max if existing
j_after_after_max = max(J_after_closeE) # == max(J)
else:
j_after_after_min = math.nan
j_after_after_max = math.nan
# <-- J_before_closeE --> <-- J_before_closeI --> <-- J_after_closeI --> <-- J_after_closeE -->
# j_bb_min j_bb_max j_ba_min j_ba_max j_ab_min j_ab_max j_aa_min j_aa_max
# (with `b` for before and `a` for after in the previous variable names)
# vs e_mean m = min(t-e_min, e_max-t) d=|i_pivot-t| min(d,m) \int min(d,m)dt \int d dt \int_(min(d,m)+d)dt \int_{t \in J}(min(d,m)+d)dt
# Case J_before_closeE & i_pivot after J before t-e_min i_pivot-t min(i_pivot-t,t-e_min) = t-e_min t^2/2-e_min*t i_pivot*t-t^2/2 t^2/2-e_min*t+i_pivot*t-t^2/2 = (i_pivot-e_min)*t (i_pivot-e_min)*tB - (i_pivot-e_min)*tA = (i_pivot-e_min)*(tB-tA)
# Case J_before_closeI & i_pivot after J before t-e_min i_pivot-t min(i_pivot-t,t-e_min) = i_pivot-t i_pivot*t-t^2/2 i_pivot*t-t^2/2 i_pivot*t-t^2/2+i_pivot*t-t^2/2 = 2*i_pivot*t-t^2 2*i_pivot*tB-tB^2 - 2*i_pivot*tA + tA^2 = 2*i_pivot*(tB-tA) - (tB^2 - tA^2)
# Case J_after_closeI & i_pivot after J after e_max-t i_pivot-t min(i_pivot-t,e_max-t) = i_pivot-t i_pivot*t-t^2/2 i_pivot*t-t^2/2 i_pivot*t-t^2/2+i_pivot*t-t^2/2 = 2*i_pivot*t-t^2 2*i_pivot*tB-tB^2 - 2*i_pivot*tA + tA^2 = 2*i_pivot*(tB-tA) - (tB^2 - tA^2)
# Case J_after_closeE & i_pivot after J after e_max-t i_pivot-t min(i_pivot-t,e_max-t) = e_max-t e_max*t-t^2/2 i_pivot*t-t^2/2 e_max*t-t^2/2+i_pivot*t-t^2/2 = (e_max+i_pivot)*t-t^2 (e_max+i_pivot)*tB-tB^2 - (e_max+i_pivot)*tA + tA^2 = (e_max+i_pivot)*(tB-tA) - (tB^2 - tA^2)
#
# Case J_before_closeE & i_pivot before J before t-e_min t-i_pivot min(t-i_pivot,t-e_min) = t-e_min t^2/2-e_min*t t^2/2-i_pivot*t t^2/2-e_min*t+t^2/2-i_pivot*t = t^2-(e_min+i_pivot)*t tB^2-(e_min+i_pivot)*tB - tA^2 + (e_min+i_pivot)*tA = (tB^2 - tA^2) - (e_min+i_pivot)*(tB-tA)
# Case J_before_closeI & i_pivot before J before t-e_min t-i_pivot min(t-i_pivot,t-e_min) = t-i_pivot t^2/2-i_pivot*t t^2/2-i_pivot*t t^2/2-i_pivot*t+t^2/2-i_pivot*t = t^2-2*i_pivot*t tB^2-2*i_pivot*tB - tA^2 + 2*i_pivot*tA = (tB^2 - tA^2) - 2*i_pivot*(tB-tA)
# Case J_after_closeI & i_pivot before J after e_max-t t-i_pivot min(t-i_pivot,e_max-t) = t-i_pivot t^2/2-i_pivot*t t^2/2-i_pivot*t t^2/2-i_pivot*t+t^2/2-i_pivot*t = t^2-2*i_pivot*t tB^2-2*i_pivot*tB - tA^2 + 2*i_pivot*tA = (tB^2 - tA^2) - 2*i_pivot*(tB-tA)
# Case J_after_closeE & i_pivot before J after e_max-t t-i_pivot min(t-i_pivot,e_max-t) = e_max-t e_max*t-t^2/2 t^2/2-i_pivot*t e_max*t-t^2/2+t^2/2-i_pivot*t = (e_max-i_pivot)*t (e_max-i_pivot)*tB - (e_max-i_pivot)*tA = (e_max-i_pivot)*(tB-tA)
if i_pivot >= max(J):
part1_before_closeE = (i_pivot - e_min) * (
j_before_before_max - j_before_before_min) # (i_pivot-e_min)*(tB-tA) # j_before_before_max - j_before_before_min
part2_before_closeI = 2 * i_pivot * (j_before_after_max - j_before_after_min) - (
j_before_after_max ** 2 - j_before_after_min ** 2) # 2*i_pivot*(tB-tA) - (tB^2 - tA^2) # j_before_after_max - j_before_after_min
part3_after_closeI = 2 * i_pivot * (j_after_before_max - j_after_before_min) - (
j_after_before_max ** 2 - j_after_before_min ** 2) # 2*i_pivot*(tB-tA) - (tB^2 - tA^2) # j_after_before_max - j_after_before_min
part4_after_closeE = (e_max + i_pivot) * (j_after_after_max - j_after_after_min) - (
j_after_after_max ** 2 - j_after_after_min ** 2) # (e_max+i_pivot)*(tB-tA) - (tB^2 - tA^2) # j_after_after_max - j_after_after_min
out_parts = [part1_before_closeE, part2_before_closeI, part3_after_closeI, part4_after_closeE]
elif i_pivot <= min(J):
part1_before_closeE = (j_before_before_max ** 2 - j_before_before_min ** 2) - (e_min + i_pivot) * (
j_before_before_max - j_before_before_min) # (tB^2 - tA^2) - (e_min+i_pivot)*(tB-tA) # j_before_before_max - j_before_before_min
part2_before_closeI = (j_before_after_max ** 2 - j_before_after_min ** 2) - 2 * i_pivot * (
j_before_after_max - j_before_after_min) # (tB^2 - tA^2) - 2*i_pivot*(tB-tA) # j_before_after_max - j_before_after_min
part3_after_closeI = (j_after_before_max ** 2 - j_after_before_min ** 2) - 2 * i_pivot * (
j_after_before_max - j_after_before_min) # (tB^2 - tA^2) - 2*i_pivot*(tB-tA) # j_after_before_max - j_after_before_min
part4_after_closeE = (e_max - i_pivot) * (
j_after_after_max - j_after_after_min) # (e_max-i_pivot)*(tB-tA) # j_after_after_max - j_after_after_min
out_parts = [part1_before_closeE, part2_before_closeI, part3_after_closeI, part4_after_closeE]
else:
raise ValueError('The i_pivot should be outside J')
out_integral_min_dm_plus_d = _sum_wo_nan(out_parts) # integral on all J, i.e. sum of the disjoint parts
# We have for each point t of J:
# \bar{F}_{t, recall}(d) = 1 - (1/|E|) * (min(d,m) + d)
# Since t is a single-point here, and we are in the case where i_pivot is inside E.
# The integral is then given by:
# C = \int_{t \in J} \bar{F}_{t, recall}(D(t)) dt
# = \int_{t \in J} 1 - (1/|E|) * (min(d,m) + d) dt
# = |J| - (1/|E|) * [\int_{t \in J} (min(d,m) + d) dt]
# = |J| - (1/|E|) * out_integral_min_dm_plus_d
DeltaJ = max(J) - min(J)
DeltaE = max(E) - min(E)
C = DeltaJ - (1 / DeltaE) * out_integral_min_dm_plus_d
return (C)
def integral_interval_probaCDF_recall(I, J, E):
"""
Integral of the probability of distances over the interval J.
Compute the integral $\int_{y \in J} Fbar_y(dist(y,I)) dy$.
This is the *integral* i.e. the sum (not the mean)
:param I: a single (non empty) predicted interval
:param J: ground truth (non empty) interval
:param E: the affiliation_bin/influence zone for J
:return: the integral $\int_{y \in J} Fbar_y(dist(y,I)) dy$
"""
# I and J are single intervals (not generic sets)
# E is the outside affiliation_bin interval of J (even for recall!)
# (in particular J \subset E)
#
# J is the portion of the ground truth affiliated to I
# I is a predicted interval (can be outside E possibly since it's recall)
def f(J_cut):
if J_cut is None:
return (0)
else:
return integral_mini_interval_Precall_CDFmethod(I, J_cut, E)
# If J_middle is fully included into I, it is
# integral of 1 on the interval J_middle, so it's |J_middle|
def f0(J_middle):
if J_middle is None:
return (0)
else:
return (max(J_middle) - min(J_middle))
cut_into_three = cut_into_three_func(J, I) # it's J that we cut into 3, depending on the position w.r.t I
# since we integrate over J this time.
#
# Distance for now, not the mean:
# Distance left: Between cut_into_three[0] and the point min(I)
d_left = f(cut_into_three[0])
# Distance middle: Between cut_into_three[1] = J inter I, and I
d_middle = f0(cut_into_three[1])
# Distance right: Between cut_into_three[2] and the point max(I)
d_right = f(cut_into_three[2])
# It's an integral so summable
return (d_left + d_middle + d_right)

View File

@@ -0,0 +1,72 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import math
from ._affiliation_zone import (
get_all_E_gt_func,
affiliation_partition)
from ._integral_interval import (
integral_interval_distance,
integral_interval_probaCDF_precision,
integral_interval_probaCDF_recall,
interval_length,
sum_interval_lengths)
def affiliation_precision_distance(Is=[(1, 2), (3, 4), (5, 6)], J=(2, 5.5)):
"""
Compute the individual average distance from Is to a single ground truth J
:param Is: list of predicted events within the affiliation_bin zone of J
:param J: couple representating the start and stop of a ground truth interval
:return: individual average precision directed distance number
"""
if all([I is None for I in Is]): # no prediction in the current area
return (math.nan) # undefined
return (sum([integral_interval_distance(I, J) for I in Is]) / sum_interval_lengths(Is))
def affiliation_precision_proba(Is=[(1, 2), (3, 4), (5, 6)], J=(2, 5.5), E=(0, 8)):
"""
Compute the individual precision probability from Is to a single ground truth J
:param Is: list of predicted events within the affiliation_bin zone of J
:param J: couple representating the start and stop of a ground truth interval
:param E: couple representing the start and stop of the zone of affiliation_bin of J
:return: individual precision probability in [0, 1], or math.nan if undefined
"""
if all([I is None for I in Is]): # no prediction in the current area
return (math.nan) # undefined
return (sum([integral_interval_probaCDF_precision(I, J, E) for I in Is]) / sum_interval_lengths(Is))
def affiliation_recall_distance(Is=[(1, 2), (3, 4), (5, 6)], J=(2, 5.5)):
"""
Compute the individual average distance from a single J to the predictions Is
:param Is: list of predicted events within the affiliation_bin zone of J
:param J: couple representating the start and stop of a ground truth interval
:return: individual average recall directed distance number
"""
Is = [I for I in Is if I is not None] # filter possible None in Is
if len(Is) == 0: # there is no prediction in the current area
return (math.inf)
E_gt_recall = get_all_E_gt_func(Is, (-math.inf, math.inf)) # here from the point of view of the predictions
Js = affiliation_partition([J], E_gt_recall) # partition of J depending of proximity with Is
return (sum([integral_interval_distance(J[0], I) for I, J in zip(Is, Js)]) / interval_length(J))
def affiliation_recall_proba(Is=[(1, 2), (3, 4), (5, 6)], J=(2, 5.5), E=(0, 8)):
"""
Compute the individual recall probability from a single ground truth J to Is
:param Is: list of predicted events within the affiliation_bin zone of J
:param J: couple representating the start and stop of a ground truth interval
:param E: couple representing the start and stop of the zone of affiliation_bin of J
:return: individual recall probability in [0, 1]
"""
Is = [I for I in Is if I is not None] # filter possible None in Is
if len(Is) == 0: # there is no prediction in the current area
return (0)
E_gt_recall = get_all_E_gt_func(Is, E) # here from the point of view of the predictions
Js = affiliation_partition([J], E_gt_recall) # partition of J depending of proximity with Is
return (sum([integral_interval_probaCDF_recall(I, J[0], E) for I, J in zip(Is, Js)]) / interval_length(J))

View File

@@ -0,0 +1,143 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from itertools import groupby
from operator import itemgetter
import math
import gzip
import glob
import os
def convert_vector_to_events(vector=[0, 1, 1, 0, 0, 1, 0]):
"""
Convert a binary vector (indicating 1 for the anomalous instances)
to a list of events. The events are considered as durations,
i.e. setting 1 at index i corresponds to an anomalous interval [i, i+1).
:param vector: a list of elements belonging to {0, 1}
:return: a list of couples, each couple representing the start and stop of
each event
"""
positive_indexes = [idx for idx, val in enumerate(vector) if val > 0]
events = []
for k, g in groupby(enumerate(positive_indexes), lambda ix: ix[0] - ix[1]):
cur_cut = list(map(itemgetter(1), g))
events.append((cur_cut[0], cur_cut[-1]))
# Consistent conversion in case of range anomalies (for indexes):
# A positive index i is considered as the interval [i, i+1),
# so the last index should be moved by 1
events = [(x, y + 1) for (x, y) in events]
return (events)
def infer_Trange(events_pred, events_gt):
"""
Given the list of events events_pred and events_gt, get the
smallest possible Trange corresponding to the start and stop indexes
of the whole series.
Trange will not influence the measure of distances, but will impact the
measures of probabilities.
:param events_pred: a list of couples corresponding to predicted events
:param events_gt: a list of couples corresponding to ground truth events
:return: a couple corresponding to the smallest range containing the events
"""
if len(events_gt) == 0:
raise ValueError('The gt events should contain at least one event')
if len(events_pred) == 0:
# empty prediction, base Trange only on events_gt (which is non empty)
return (infer_Trange(events_gt, events_gt))
min_pred = min([x[0] for x in events_pred])
min_gt = min([x[0] for x in events_gt])
max_pred = max([x[1] for x in events_pred])
max_gt = max([x[1] for x in events_gt])
Trange = (min(min_pred, min_gt), max(max_pred, max_gt))
return (Trange)
def has_point_anomalies(events):
"""
Checking whether events contain point anomalies, i.e.
events starting and stopping at the same time.
:param events: a list of couples corresponding to predicted events
:return: True is the events have any point anomalies, False otherwise
"""
if len(events) == 0:
return (False)
return (min([x[1] - x[0] for x in events]) == 0)
def _sum_wo_nan(vec):
"""
Sum of elements, ignoring math.isnan ones
:param vec: vector of floating numbers
:return: sum of the elements, ignoring math.isnan ones
"""
vec_wo_nan = [e for e in vec if not math.isnan(e)]
return (sum(vec_wo_nan))
def _len_wo_nan(vec):
"""
Count of elements, ignoring math.isnan ones
:param vec: vector of floating numbers
:return: count of the elements, ignoring math.isnan ones
"""
vec_wo_nan = [e for e in vec if not math.isnan(e)]
return (len(vec_wo_nan))
def read_gz_data(filename='data/machinetemp_groundtruth.gz'):
"""
Load a file compressed with gz, such that each line of the
file is either 0 (representing a normal instance) or 1 (representing)
an anomalous instance.
:param filename: file path to the gz compressed file
:return: list of integers with either 0 or 1
"""
with gzip.open(filename, 'rb') as f:
content = f.read().splitlines()
content = [int(x) for x in content]
return (content)
def read_all_as_events():
"""
Load the files contained in the folder `data/` and convert
to events. The length of the series is kept.
The convention for the file name is: `dataset_algorithm.gz`
:return: two dictionaries:
- the first containing the list of events for each dataset and algorithm,
- the second containing the range of the series for each dataset
"""
filepaths = glob.glob('data/*.gz')
datasets = dict()
Tranges = dict()
for filepath in filepaths:
vector = read_gz_data(filepath)
events = convert_vector_to_events(vector)
# ad hoc cut for those files
cut_filepath = (os.path.split(filepath)[1]).split('_')
data_name = cut_filepath[0]
algo_name = (cut_filepath[1]).split('.')[0]
if not data_name in datasets:
datasets[data_name] = dict()
Tranges[data_name] = (0, len(vector))
datasets[data_name][algo_name] = events
return (datasets, Tranges)
def f1_func(p, r):
"""
Compute the f1 function
:param p: precision numeric value
:param r: recall numeric value
:return: f1 numeric value
"""
return (2 * p * r / (p + r))

View File

@@ -0,0 +1,119 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from .generics import (
infer_Trange,
has_point_anomalies,
_len_wo_nan,
_sum_wo_nan,
read_all_as_events)
from ._affiliation_zone import (
get_all_E_gt_func,
affiliation_partition)
from ._single_ground_truth_event import (
affiliation_precision_distance,
affiliation_recall_distance,
affiliation_precision_proba,
affiliation_recall_proba)
def test_events(events):
"""
Verify the validity of the input events
:param events: list of events, each represented by a couple (start, stop)
:return: None. Raise an error for incorrect formed or non ordered events
"""
if type(events) is not list:
raise TypeError('Input `events` should be a list of couples')
if not all([type(x) is tuple for x in events]):
raise TypeError('Input `events` should be a list of tuples')
if not all([len(x) == 2 for x in events]):
raise ValueError('Input `events` should be a list of couples (start, stop)')
if not all([x[0] <= x[1] for x in events]):
raise ValueError('Input `events` should be a list of couples (start, stop) with start <= stop')
if not all([events[i][1] < events[i + 1][0] for i in range(len(events) - 1)]):
raise ValueError('Couples of input `events` should be disjoint and ordered')
def pr_from_events(events_pred, events_gt, Trange):
"""
Compute the affiliation_bin metrics including the precision/recall in [0,1],
along with the individual precision/recall distances and probabilities
:param events_pred: list of predicted events, each represented by a couple
indicating the start and the stop of the event
:param events_gt: list of ground truth events, each represented by a couple
indicating the start and the stop of the event
:param Trange: range of the series where events_pred and events_gt are included,
represented as a couple (start, stop)
:return: dictionary with precision, recall, and the individual metrics
"""
# testing the inputs
test_events(events_pred)
test_events(events_gt)
# other tests
minimal_Trange = infer_Trange(events_pred, events_gt)
if not Trange[0] <= minimal_Trange[0]:
raise ValueError('`Trange` should include all the events')
if not minimal_Trange[1] <= Trange[1]:
raise ValueError('`Trange` should include all the events')
if len(events_gt) == 0:
raise ValueError('Input `events_gt` should have at least one event')
if has_point_anomalies(events_pred) or has_point_anomalies(events_gt):
raise ValueError('Cannot manage point anomalies currently')
if Trange is None:
# Set as default, but Trange should be indicated if probabilities are used
raise ValueError('Trange should be indicated (or inferred with the `infer_Trange` function')
E_gt = get_all_E_gt_func(events_gt, Trange)
aff_partition = affiliation_partition(events_pred, E_gt)
# Computing precision distance
d_precision = [affiliation_precision_distance(Is, J) for Is, J in zip(aff_partition, events_gt)]
# Computing recall distance
d_recall = [affiliation_recall_distance(Is, J) for Is, J in zip(aff_partition, events_gt)]
# Computing precision
p_precision = [affiliation_precision_proba(Is, J, E) for Is, J, E in zip(aff_partition, events_gt, E_gt)]
# Computing recall
p_recall = [affiliation_recall_proba(Is, J, E) for Is, J, E in zip(aff_partition, events_gt, E_gt)]
if _len_wo_nan(p_precision) > 0:
p_precision_average = _sum_wo_nan(p_precision) / _len_wo_nan(p_precision)
else:
p_precision_average = p_precision[0] # math.nan
p_recall_average = sum(p_recall) / len(p_recall)
dict_out = dict({'precision': p_precision_average,
'recall': p_recall_average,
'individual_precision_probabilities': p_precision,
'individual_recall_probabilities': p_recall,
'individual_precision_distances': d_precision,
'individual_recall_distances': d_recall})
return (dict_out)
def produce_all_results():
"""
Produce the affiliation_bin precision/recall for all files
contained in the `data` repository
:return: a dictionary indexed by data names, each containing a dictionary
indexed by algorithm names, each containing the results of the affiliation_bin
metrics (precision, recall, individual probabilities and distances)
"""
datasets, Tranges = read_all_as_events() # read all the events in folder `data`
results = dict()
for data_name in datasets.keys():
results_data = dict()
for algo_name in datasets[data_name].keys():
if algo_name != 'groundtruth':
results_data[algo_name] = pr_from_events(datasets[data_name][algo_name],
datasets[data_name]['groundtruth'],
Tranges[data_name])
results[data_name] = results_data
return (results)

12
evaluation/f1.py Normal file
View File

@@ -0,0 +1,12 @@
from sklearn.metrics import f1_score, precision_score, recall_score
def evaluate(y_true: list, y_pred: list) -> float:
"""
F1评估方法
:param y_true: 真实标签
:param y_pred: 检测标签
:return: f1、recall、precision
"""
f1 = f1_score(y_true, y_pred)
return f1

66
evaluation/f1pa.py Normal file
View File

@@ -0,0 +1,66 @@
import numpy as np
from sklearn.metrics import f1_score, precision_score, recall_score
def adjust_predicts(score, label,
threshold=None,
pred=None,
calc_latency=False):
"""
该函数是point-adjust官方源码
Calculate adjusted predict labels using given `score`, `threshold` (or given `pred`) and `label`.
Args:
score =: The anomaly score
label : The ground-truth label
threshold (float): The threshold of anomaly score.
A point is labeled as "anomaly" if its score is lower than the threshold.
pred : if not None, adjust `pred` and ignore `score` and `threshold`,
calc_latency (bool):
Returns:
np.ndarray: predict labels
"""
if len(score) != len(label):
raise ValueError("score and label must have the same length")
score = np.asarray(score)
label = np.asarray(label)
latency = 0
if pred is None:
predict = score < threshold
else:
predict = pred
actual = label > 0.1
anomaly_state = False
anomaly_count = 0
for i in range(len(score)):
if actual[i] and predict[i] and not anomaly_state:
anomaly_state = True
anomaly_count += 1
for j in range(i, 0, -1):
if not actual[j]:
break
else:
if not predict[j]:
predict[j] = True
latency += 1
elif not actual[i]:
anomaly_state = False
if anomaly_state:
predict[i] = True
if calc_latency:
return predict, latency / (anomaly_count + 1e-4)
else:
return predict
def evaluate(y_true: list, y_pred: list) -> float:
"""
F1PA评估方法经过point adjust调整标签后再用F1评分
:param y_true: 真实标签
:param y_pred: 检测标签
:return: 经过pa调整后的f1、recall、precision
"""
y_true, y_pred = y_true.copy(), y_pred.copy()
adjust_y_pred = adjust_predicts(score=np.array([0] * len(y_true)), label=y_true, pred=y_pred)
f1 = f1_score(y_true, adjust_y_pred)
return f1

156
evaluation/ftad.py Normal file
View File

@@ -0,0 +1,156 @@
import math
import numpy as np
def evaluate(y_true: [int], y_pred: [int], pos_label: int = 1, max_segment: int = 0) -> float:
"""
基于异常段计算F值
:param y_true: 真实标签
:param y_pred: 检测标签
:param pos_label: 检测的目标数值,即指定哪个数为异常数值
:param max_segment: 异常段最大长度
:return: 段F值
"""
p_tad = precision_tad(y_true=y_true, y_pred=y_pred, pos_label=pos_label, max_segment=max_segment)
r_tad = recall_tad(y_true=y_true, y_pred=y_pred, pos_label=pos_label, max_segment=max_segment)
score = 0
if p_tad and r_tad:
score = 2 * p_tad * r_tad / (p_tad + r_tad)
return score
def recall_tad(y_true: [int], y_pred: [int], pos_label: int = 1, max_segment: int = 0) -> float:
"""
基于异常段计算召回率
:param y_true: 真实标签
:param y_pred: 检测标签
:param pos_label: 检测的目标数值,即指定哪个数为异常数值
:param max_segment: 异常段最大长度
:return: 段召回率
"""
if max_segment == 0:
max_segment = get_max_segment(y_true=y_true, pos_label=pos_label)
score = tp_count(y_true, y_pred, pos_label=pos_label, max_segment=max_segment)
return score
def precision_tad(y_true: [int], y_pred: [int], pos_label: int = 1, max_segment: int = 0) -> float:
"""
基于异常段计算精确率
:param y_true: 真实标签
:param y_pred: 检测标签
:param pos_label: 检测的目标数值,即指定哪个数为异常数值
:param max_segment: 异常段最大长度
:return: 段精确率
"""
if max_segment == 0:
max_segment = get_max_segment(y_true=y_true, pos_label=pos_label)
score = tp_count(y_pred, y_true, pos_label=pos_label, max_segment=max_segment)
return score
def tp_count(y_true: [int], y_pred: [int], max_segment: int = 0, pos_label: int = 1) -> float:
"""
计算段的评分交换y_true和y_pred可以分别表示召回率与精确率。
:param y_true: 真实标签
:param y_pred: 检测标签
:param pos_label: 检测的目标数值,即指定哪个数为异常数值
:param max_segment: 异常段最大长度
:return: 分数
"""
if len(y_true) != len(y_pred):
raise ValueError("y_true and y_pred should have the same length.")
neg_label = 1 - pos_label
position = 0
tp_list = []
if max_segment == 0:
raise ValueError("max segment length is 0")
while position < len(y_true):
if y_true[position] == neg_label:
position += 1
continue
elif y_true[position] == pos_label:
start = position
while position < len(y_true) and y_true[position] == pos_label and position - start < max_segment:
position += 1
end = position
true_window = [weight_line(i/(end-start)) for i in range(end-start)]
true_window = softmax(true_window)
pred_window = np.array(y_pred[start:end])
pred_window = np.where(pred_window == pos_label, true_window, 0)
tp_list.append(sum(pred_window))
else:
raise ValueError("label value must be 0 or 1")
score = sum(tp_list) / len(tp_list) if len(tp_list) > 0 else 0
return score
def weight_line(position: float) -> float:
"""
按照权重曲线,给不同位置的点赋值
:param position: 点在段中的相对位置,取值范围[0,1]
:return: 权重值
"""
if position < 0 or position > 1:
raise ValueError(f"point position in segment need between 0 and 1, {position} is error position")
sigma = 1 / (1 + math.exp(10*(position-0.5)))
return sigma
def softmax(x: [float]) -> [float]:
"""
softmax函数
:param x: 一个异常段的数据
:return: 经过softmax的一段数据
"""
ret = np.exp(x)/np.sum(np.exp(x), axis=0)
return ret.tolist()
def get_max_segment(y_true: [int], pos_label: int = 1) -> int:
"""
获取最大的异常段的长度
:param y_true: 真实标签
:param pos_label: 异常标签的取值
:return: 最大长度
"""
max_num, i = 0, 0
neg_label = 1 - pos_label
while i < len(y_true):
if y_true[i] == neg_label:
i += 1
continue
elif y_true[i] == pos_label:
start = i
while i < len(y_true) and y_true[i] == pos_label:
i += 1
end = i
max_num = max(max_num, end-start)
else:
raise ValueError("label value must be 0 or 1")
return max_num
if __name__ == "__main__":
# y_true = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
# 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
# y_pred = [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
# 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
import pandas as pd
data = pd.read_csv("../records/2023-04-10_10-30-27/detection_result/MtadGatAtt_SWAT.csv")
y_true = data["true"].tolist()
y_pred = data["ftad"].tolist()
print(evaluate(y_true, y_pred))
# print(precision_tad(y_true, y_pred))
# print(recall_tad(y_true, y_pred))
# from sklearn.metrics import f1_score, precision_score, recall_score
# print(f1_score(y_true, y_pred))
# print(precision_score(y_true, y_pred))
# print(recall_score(y_true, y_pred))

10
evaluation/template.py Normal file
View File

@@ -0,0 +1,10 @@
def evaluate(y_true: [int], y_pred: [int]) -> float:
"""
评估方法
:param y_true: 真实标签
:param y_pred: 检测标签
:return: 分数float类型
"""
return 0.0

169
main.py Normal file
View File

@@ -0,0 +1,169 @@
# -*- coding:utf-8 -*-
import configparser
from daemon import Daemon
import sys
from torch.cuda import is_available
from loguru import logger
from datetime import datetime
from utils import create_all_dataloader, train_and_test_model
import method
import gc
import traceback
import pandas
import os
class Config(configparser.ConfigParser):
def __init__(self, defaults=None):
configparser.ConfigParser.__init__(self, defaults=defaults)
def optionxform(self, optionstr):
return optionstr
def as_dict(self):
d = dict(self._sections)
for k in d:
d[k] = dict(d[k])
return d
class Main(Daemon):
def __init__(self, pidfile):
super(Main, self).__init__(pidfile=pidfile)
current = datetime.now()
self.start_time = current.strftime("%Y-%m-%d_%H-%M-%S")
if len(sys.argv) == 1 or sys.argv[1] == "start":
self.run()
elif sys.argv[1] == "stop":
self.stop()
elif sys.argv[1] == "daemon":
self.start()
else:
print("Input format error. Please input: python3 main.py start|stop|daemon")
sys.exit(0)
def run(self):
# 读取配置文件参数
cf = Config()
cf.read("./config.ini", encoding='utf8')
cf_dict = cf.as_dict()
# 读取数据集名称
dataset_names = cf.get("Dataset", "name")
datasets = dataset_names.split(",")
datasets = [name.strip() for name in datasets]
# 读取模型名称
model_names = cf.get("Method", "name")
models = model_names.split(",")
models = [name.strip() for name in models]
# 读取预处理方法
preprocess_name = cf.get("Preprocess", "name")
# 读取评估方法
evaluation_names = cf.get("Evaluation", "name")
evaluations = evaluation_names.split(",")
evaluations = [name.strip() for name in evaluations]
# 读取模型参数文件路径
model_path = cf_dict["ModelPath"]
# 读取训练参数
train = cf.getboolean("BaseParameters", "train")
epochs = cf.getint("BaseParameters", "epochs")
batch_size = cf.getint("BaseParameters", "batch_size")
learning_rate = cf.getfloat("BaseParameters", "learning_rate")
device = cf.get("BaseParameters", "device")
if device == "auto":
device = 'cuda:0' if is_available() else 'cpu'
# 读取自定义参数
customs = cf_dict["CustomParameters"]
# 建立本次实验记录的路径
os.makedirs(f"./records", exist_ok=True)
os.makedirs(f"./records/{self.start_time}", exist_ok=True)
os.makedirs(f"./records/{self.start_time}/detection_result", exist_ok=True)
os.makedirs(f"./records/{self.start_time}/model", exist_ok=True)
# 初始化日志
logger.add(f"./records/{self.start_time}/log",
level='DEBUG',
format='{time:YYYY-MM-DD HH:mm:ss} - {level} - {file} - {line} - {message}',
rotation="100 MB")
# 核心程序
self.core(models, datasets, preprocess_name, evaluations, model_path, train, epochs, batch_size, learning_rate,
device, customs)
logger.info(f"实验结束,关闭进程")
def core(self, models: [str], datasets: [str], preprocess_name: str, evaluations: [str], model_path: {}, train: bool,
epochs: int, batch_size: int, learning_rate: float, device: str, customs: {}):
"""
初始化数据集与模型,并开始训练与测试
:param models: 训练的模型名称,可包含多个
:param datasets: 使用的数据集名称,可包含多个
:param preprocess_name: 预处理方法名称
:param evaluations: 评估方法名称,可包含多个
:param model_path: 需要加载模型参数的路径,可包含多个
:param train: 是否训练如果为False则仅测试模型
:param epochs: 总训练轮数
:param batch_size: batch的尺寸
:param learning_rate: 学习率
:param device: 设备
:param customs: 自定义参数
"""
logger.info(f"加载数据集")
try:
# 初始化所有数据集
all_dataloader = create_all_dataloader(datasets=datasets, input_size=int(customs["input_size"]),
output_size=int(customs["output_size"]), step=int(customs["step"]),
batch_size=batch_size, time_index=customs["time_index"] == "true",
del_column_name=customs["del_column_name"] == "true",
preprocess_name=preprocess_name)
except RuntimeError:
logger.error(traceback.format_exc())
return
# 开始训练与测试
for model_name in models:
try:
logger.info(f"------------华丽丽的分界线:{model_name} 实验开始------------")
for i in range(len(all_dataloader)):
dataloader = all_dataloader[i]
all_score = {}
for sub_dataloader in dataloader:
dataset_name = sub_dataloader["dataset_name"]
normal_dataloader = sub_dataloader["normal"]
attack_dataloader = sub_dataloader["attack"]
logger.info(f"初始化模型 {model_name}")
model = eval(f"method.{model_name}.Model")(customs=customs, dataloader=normal_dataloader)
model = model.to(device)
logger.info(f"模型初始化完成")
pth_name = model_path[f"{model_name}_{dataset_name}"] if f"{model_name}_{dataset_name}" \
in model_path else None
best_score, best_detection = train_and_test_model(start_time=self.start_time, epochs=epochs,
normal_dataloader=normal_dataloader,
attack_dataloader=attack_dataloader,
model=model, evaluations=evaluations,
device=device, lr=learning_rate,
model_path=pth_name, train=train)
# 保存最佳检测结果的标签为csv文件
best_detection = pandas.DataFrame(best_detection)
best_detection.to_csv(f"./records/{self.start_time}/detection_result/{model_name}_{dataset_name}.csv", index=False)
for evaluation_name in evaluations:
if evaluation_name not in all_score:
all_score[evaluation_name] = []
all_score[evaluation_name].append(best_score[evaluation_name])
gc.collect()
logger.info(f"------------------------")
logger.info(f"{model_name} / {datasets[i]} 实验完毕")
for evaluation_name in all_score:
logger.info(f"{evaluation_name}: {'{:.3f}'.format(sum(all_score[evaluation_name]) / len(all_score[evaluation_name]))}")
logger.info(f"------------------------")
logger.info(f"------------华丽丽的分界线:{model_name} 实验结束------------")
except RuntimeError:
logger.error(traceback.format_exc())
return
if __name__ == '__main__':
pidpath = "/tmp/command_detection.pid"
app = Main(pidpath)

View File

@@ -0,0 +1,305 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import math
from math import sqrt
import torch.utils.data as tud
class PositionalEmbedding(nn.Module):
def __init__(self, d_model, max_len=5000):
super(PositionalEmbedding, self).__init__()
# Compute the positional encodings once in log space.
pe = torch.zeros(max_len, d_model).float()
pe.require_grad = False
position = torch.arange(0, max_len).float().unsqueeze(1)
div_term = (torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)).exp()
pe[:, 0::2] = torch.sin(position * div_term)
pe[:, 1::2] = torch.cos(position * div_term)
pe = pe.unsqueeze(0)
self.register_buffer('pe', pe)
def forward(self, x):
return self.pe[:, :x.size(1)]
class TokenEmbedding(nn.Module):
def __init__(self, c_in, d_model):
super(TokenEmbedding, self).__init__()
padding = 1 if torch.__version__ >= '1.5.0' else 2
self.tokenConv = nn.Conv1d(in_channels=c_in, out_channels=d_model,
kernel_size=3, padding=padding, padding_mode='circular', bias=False)
for m in self.modules():
if isinstance(m, nn.Conv1d):
nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='leaky_relu')
def forward(self, x):
x = self.tokenConv(x.permute(0, 2, 1)).transpose(1, 2)
return x
class DataEmbedding(nn.Module):
def __init__(self, c_in, d_model, dropout=0.0):
super(DataEmbedding, self).__init__()
self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model)
self.position_embedding = PositionalEmbedding(d_model=d_model)
self.dropout = nn.Dropout(p=dropout)
def forward(self, x):
x = self.value_embedding(x) + self.position_embedding(x)
return self.dropout(x)
class TriangularCausalMask():
def __init__(self, B, L, device="cpu"):
mask_shape = [B, 1, L, L]
with torch.no_grad():
self._mask = torch.triu(torch.ones(mask_shape, dtype=torch.bool), diagonal=1).to(device)
@property
def mask(self):
return self._mask
class AnomalyAttention(nn.Module):
def __init__(self, win_size, mask_flag=True, scale=None, attention_dropout=0.0, output_attention=False):
super(AnomalyAttention, self).__init__()
self.scale = scale
self.mask_flag = mask_flag
self.output_attention = output_attention
self.dropout = nn.Dropout(attention_dropout)
window_size = win_size
self.distances = torch.zeros((window_size, window_size)).cuda()
for i in range(window_size):
for j in range(window_size):
self.distances[i][j] = abs(i - j)
def forward(self, queries, keys, values, sigma, attn_mask):
B, L, H, E = queries.shape
_, S, _, D = values.shape
scale = self.scale or 1. / sqrt(E)
scores = torch.einsum("blhe,bshe->bhls", queries, keys)
if self.mask_flag:
if attn_mask is None:
attn_mask = TriangularCausalMask(B, L, device=queries.device)
scores.masked_fill_(attn_mask.mask, -np.inf)
attn = scale * scores
sigma = sigma.transpose(1, 2) # B L H -> B H L
window_size = attn.shape[-1]
sigma = torch.sigmoid(sigma * 5) + 1e-5
sigma = torch.pow(3, sigma) - 1
sigma = sigma.unsqueeze(-1).repeat(1, 1, 1, window_size) # B H L L
prior = self.distances.unsqueeze(0).unsqueeze(0).repeat(sigma.shape[0], sigma.shape[1], 1, 1).cuda()
prior = 1.0 / (math.sqrt(2 * math.pi) * sigma) * torch.exp(-prior ** 2 / 2 / (sigma ** 2))
series = self.dropout(torch.softmax(attn, dim=-1))
V = torch.einsum("bhls,bshd->blhd", series, values)
if self.output_attention:
return (V.contiguous(), series, prior, sigma)
else:
return (V.contiguous(), None)
class AttentionLayer(nn.Module):
def __init__(self, attention, d_model, n_heads, d_keys=None,
d_values=None):
super(AttentionLayer, self).__init__()
d_keys = d_keys or (d_model // n_heads)
d_values = d_values or (d_model // n_heads)
self.norm = nn.LayerNorm(d_model)
self.inner_attention = attention
self.query_projection = nn.Linear(d_model,
d_keys * n_heads)
self.key_projection = nn.Linear(d_model,
d_keys * n_heads)
self.value_projection = nn.Linear(d_model,
d_values * n_heads)
self.sigma_projection = nn.Linear(d_model,
n_heads)
self.out_projection = nn.Linear(d_values * n_heads, d_model)
self.n_heads = n_heads
def forward(self, queries, keys, values, attn_mask):
B, L, _ = queries.shape
_, S, _ = keys.shape
H = self.n_heads
x = queries
queries = self.query_projection(queries).view(B, L, H, -1)
keys = self.key_projection(keys).view(B, S, H, -1)
values = self.value_projection(values).view(B, S, H, -1)
sigma = self.sigma_projection(x).view(B, L, H)
out, series, prior, sigma = self.inner_attention(
queries,
keys,
values,
sigma,
attn_mask
)
out = out.view(B, L, -1)
return self.out_projection(out), series, prior, sigma
class EncoderLayer(nn.Module):
def __init__(self, attention, d_model, d_ff=None, dropout=0.1, activation="relu"):
super(EncoderLayer, self).__init__()
d_ff = d_ff or 4 * d_model
self.attention = attention
self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)
self.norm1 = nn.LayerNorm(d_model)
self.norm2 = nn.LayerNorm(d_model)
self.dropout = nn.Dropout(dropout)
self.activation = F.relu if activation == "relu" else F.gelu
def forward(self, x, attn_mask=None):
new_x, attn, mask, sigma = self.attention(
x, x, x,
attn_mask=attn_mask
)
x = x + self.dropout(new_x)
y = x = self.norm1(x)
y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
y = self.dropout(self.conv2(y).transpose(-1, 1))
return self.norm2(x + y), attn, mask, sigma
class Encoder(nn.Module):
def __init__(self, attn_layers, norm_layer=None):
super(Encoder, self).__init__()
self.attn_layers = nn.ModuleList(attn_layers)
self.norm = norm_layer
def forward(self, x, attn_mask=None):
# x [B, L, D]
series_list = []
prior_list = []
sigma_list = []
for attn_layer in self.attn_layers:
x, series, prior, sigma = attn_layer(x, attn_mask=attn_mask)
series_list.append(series)
prior_list.append(prior)
sigma_list.append(sigma)
if self.norm is not None:
x = self.norm(x)
return x, series_list, prior_list, sigma_list
class Model(nn.Module):
def __init__(self, customs: {}, dataloader: tud.DataLoader):
super(Model, self).__init__()
win_size = int(customs["input_size"])
enc_in = c_out = dataloader.dataset.train_inputs.shape[-1]
d_model = 512
n_heads = 8
e_layers = 3
d_ff = 512
dropout = 0.0
activation = 'gelu'
output_attention = True
self.k = 3
self.win_size = win_size
self.name = "AnomalyTransformer"
# Encoding
self.embedding = DataEmbedding(enc_in, d_model, dropout)
# Encoder
self.encoder = Encoder(
[
EncoderLayer(
AttentionLayer(
AnomalyAttention(win_size, False, attention_dropout=dropout, output_attention=output_attention),
d_model, n_heads),
d_model,
d_ff,
dropout=dropout,
activation=activation
) for l in range(e_layers)
],
norm_layer=torch.nn.LayerNorm(d_model)
)
self.projection = nn.Linear(d_model, c_out, bias=True)
def forward(self, x):
enc_out = self.embedding(x)
enc_out, series, prior, sigmas = self.encoder(enc_out)
enc_out = self.projection(enc_out)
return enc_out, series, prior, sigmas
@staticmethod
def my_kl_loss(p, q):
res = p * (torch.log(p + 0.0001) - torch.log(q + 0.0001))
return torch.mean(torch.sum(res, dim=-1), dim=1)
def loss(self, x, y_true, epoch: int = None, device: str = "cpu"):
output, series, prior, _ = self.forward(x)
series_loss = 0.0
prior_loss = 0.0
for u in range(len(prior)):
series_loss += (torch.mean(self.my_kl_loss(series[u], (prior[u] / torch.unsqueeze(torch.sum(prior[u], dim=-1), dim=-1).repeat(1, 1, 1, self.win_size)).detach())) +
torch.mean(self.my_kl_loss((prior[u] / torch.unsqueeze(torch.sum(prior[u], dim=-1), dim=-1).repeat(1, 1, 1, self.win_size)).detach(), series[u])))
prior_loss += (torch.mean(self.my_kl_loss((prior[u] / torch.unsqueeze(torch.sum(prior[u], dim=-1), dim=-1).repeat(1, 1, 1, self.win_size)), series[u].detach())) +
torch.mean(self.my_kl_loss(series[u].detach(), (prior[u] / torch.unsqueeze(torch.sum(prior[u], dim=-1), dim=-1).repeat(1, 1, 1, self.win_size)))))
series_loss = series_loss / len(prior)
prior_loss = prior_loss / len(prior)
rec_loss = nn.MSELoss()(output, x)
loss1 = rec_loss - self.k * series_loss
loss2 = rec_loss + self.k * prior_loss
# Minimax strategy
loss1.backward(retain_graph=True)
loss2.backward()
return loss1.item()
def detection(self, x, y_true, epoch: int = None, device: str = "cpu"):
temperature = 50
output, series, prior, _ = self.forward(x)
loss = torch.mean(nn.MSELoss()(x, output), dim=-1)
series_loss = 0.0
prior_loss = 0.0
for u in range(len(prior)):
if u == 0:
series_loss = self.my_kl_loss(series[u], (
prior[u] / torch.unsqueeze(torch.sum(prior[u], dim=-1), dim=-1).repeat(1, 1, 1,
self.win_size)).detach()) * temperature
prior_loss = self.my_kl_loss(
(prior[u] / torch.unsqueeze(torch.sum(prior[u], dim=-1), dim=-1).repeat(1, 1, 1,
self.win_size)),
series[u].detach()) * temperature
else:
series_loss += self.my_kl_loss(series[u], (
prior[u] / torch.unsqueeze(torch.sum(prior[u], dim=-1), dim=-1).repeat(1, 1, 1,
self.win_size)).detach()) * temperature
prior_loss += self.my_kl_loss(
(prior[u] / torch.unsqueeze(torch.sum(prior[u], dim=-1), dim=-1).repeat(1, 1, 1,
self.win_size)),
series[u].detach()) * temperature
metric = torch.softmax((-series_loss - prior_loss), dim=-1)
cri = metric * loss
cri = cri.mean(dim=-1)
return cri, None

414
method/MtadGat.py Normal file
View File

@@ -0,0 +1,414 @@
"""
github找的第三方实现的Mtad-Gat源码
由于源码使用了torch.empty经常导致loss为nan因此替换为了torch.zeros
"""
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as tud
class ConvLayer(nn.Module):
"""1-D Convolution layer to extract high-level features of each time-series input
:param n_features: Number of input features/nodes
:param window_size: length of the input sequence
:param kernel_size: size of kernel to use in the convolution operation
"""
def __init__(self, n_features, kernel_size=7):
super(ConvLayer, self).__init__()
self.padding = nn.ConstantPad1d((kernel_size - 1) // 2, 0.0)
self.conv = nn.Conv1d(in_channels=n_features, out_channels=n_features, kernel_size=kernel_size)
self.relu = nn.ReLU()
def forward(self, x):
x = x.permute(0, 2, 1)
x = self.padding(x)
x = self.relu(self.conv(x))
return x.permute(0, 2, 1) # Permute back
class FeatureAttentionLayer(nn.Module):
"""Single Graph Feature/Spatial Attention Layer
:param n_features: Number of input features/nodes
:param window_size: length of the input sequence
:param dropout: percentage of nodes to dropout
:param alpha: negative slope used in the leaky rely activation function
:param embed_dim: embedding dimension (output dimension of linear transformation)
:param use_gatv2: whether to use the modified attention mechanism of GATv2 instead of standard GAT
:param use_bias: whether to include a bias term in the attention layer
"""
def __init__(self, n_features, window_size, dropout, alpha, embed_dim=None, use_gatv2=True, use_bias=True):
super(FeatureAttentionLayer, self).__init__()
self.n_features = n_features
self.window_size = window_size
self.dropout = dropout
self.embed_dim = embed_dim if embed_dim is not None else window_size
self.use_gatv2 = use_gatv2
self.num_nodes = n_features
self.use_bias = use_bias
# Because linear transformation is done after concatenation in GATv2
if self.use_gatv2:
self.embed_dim *= 2
lin_input_dim = 2 * window_size
a_input_dim = self.embed_dim
else:
lin_input_dim = window_size
a_input_dim = 2 * self.embed_dim
self.lin = nn.Linear(lin_input_dim, self.embed_dim)
# self.a = nn.Parameter(torch.empty((a_input_dim, 1)))
self.a = nn.Parameter(torch.zeros((a_input_dim, 1)))
nn.init.xavier_uniform_(self.a.data, gain=1.414)
if self.use_bias:
# self.bias = nn.Parameter(torch.empty(n_features, n_features))
self.bias = nn.Parameter(torch.zeros(n_features, n_features))
self.leakyrelu = nn.LeakyReLU(alpha)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
# x shape (b, n, k): b - batch size, n - window size, k - number of features
# For feature attention we represent a node as the values of a particular feature across all timestamps
x = x.permute(0, 2, 1)
# 'Dynamic' GAT attention
# Proposed by Brody et. al., 2021 (https://arxiv.org/pdf/2105.14491.pdf)
# Linear transformation applied after concatenation and attention layer applied after leakyrelu
if self.use_gatv2:
a_input = self._make_attention_input(x) # (b, k, k, 2*window_size)
a_input = self.leakyrelu(self.lin(a_input)) # (b, k, k, embed_dim)
e = torch.matmul(a_input, self.a).squeeze(3) # (b, k, k, 1)
# Original GAT attention
else:
Wx = self.lin(x) # (b, k, k, embed_dim)
a_input = self._make_attention_input(Wx) # (b, k, k, 2*embed_dim)
e = self.leakyrelu(torch.matmul(a_input, self.a)).squeeze(3) # (b, k, k, 1)
if self.use_bias:
e += self.bias
# Attention weights
attention = torch.softmax(e, dim=2)
attention = torch.dropout(attention, self.dropout, train=self.training)
# Computing new node features using the attention
h = self.sigmoid(torch.matmul(attention, x))
return h.permute(0, 2, 1)
def _make_attention_input(self, v):
"""Preparing the feature attention mechanism.
Creating matrix with all possible combinations of concatenations of node.
Each node consists of all values of that node within the window
v1 || v1,
...
v1 || vK,
v2 || v1,
...
v2 || vK,
...
...
vK || v1,
...
vK || vK,
"""
K = self.num_nodes
blocks_repeating = v.repeat_interleave(K, dim=1) # Left-side of the matrix
blocks_alternating = v.repeat(1, K, 1) # Right-side of the matrix
combined = torch.cat((blocks_repeating, blocks_alternating), dim=2) # (b, K*K, 2*window_size)
if self.use_gatv2:
return combined.view(v.size(0), K, K, 2 * self.window_size)
else:
return combined.view(v.size(0), K, K, 2 * self.embed_dim)
class TemporalAttentionLayer(nn.Module):
"""Single Graph Temporal Attention Layer
:param n_features: number of input features/nodes
:param window_size: length of the input sequence
:param dropout: percentage of nodes to dropout
:param alpha: negative slope used in the leaky rely activation function
:param embed_dim: embedding dimension (output dimension of linear transformation)
:param use_gatv2: whether to use the modified attention mechanism of GATv2 instead of standard GAT
:param use_bias: whether to include a bias term in the attention layer
"""
def __init__(self, n_features, window_size, dropout, alpha, embed_dim=None, use_gatv2=True, use_bias=True):
super(TemporalAttentionLayer, self).__init__()
self.n_features = n_features
self.window_size = window_size
self.dropout = dropout
self.use_gatv2 = use_gatv2
self.embed_dim = embed_dim if embed_dim is not None else n_features
self.num_nodes = window_size
self.use_bias = use_bias
# Because linear transformation is performed after concatenation in GATv2
if self.use_gatv2:
self.embed_dim *= 2
lin_input_dim = 2 * n_features
a_input_dim = self.embed_dim
else:
lin_input_dim = n_features
a_input_dim = 2 * self.embed_dim
self.lin = nn.Linear(lin_input_dim, self.embed_dim)
# self.a = nn.Parameter(torch.empty((a_input_dim, 1)))
self.a = nn.Parameter(torch.zeros((a_input_dim, 1)))
nn.init.xavier_uniform_(self.a.data, gain=1.414)
if self.use_bias:
# self.bias = nn.Parameter(torch.empty(window_size, window_size))
self.bias = nn.Parameter(torch.zeros(window_size, window_size))
self.leakyrelu = nn.LeakyReLU(alpha)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
# x shape (b, n, k): b - batch size, n - window size, k - number of features
# For temporal attention a node is represented as all feature values at a specific timestamp
# 'Dynamic' GAT attention
# Proposed by Brody et. al., 2021 (https://arxiv.org/pdf/2105.14491.pdf)
# Linear transformation applied after concatenation and attention layer applied after leakyrelu
if self.use_gatv2:
a_input = self._make_attention_input(x) # (b, n, n, 2*n_features)
a_input = self.leakyrelu(self.lin(a_input)) # (b, n, n, embed_dim)
e = torch.matmul(a_input, self.a).squeeze(3) # (b, n, n, 1)
# Original GAT attention
else:
Wx = self.lin(x) # (b, n, n, embed_dim)
a_input = self._make_attention_input(Wx) # (b, n, n, 2*embed_dim)
e = self.leakyrelu(torch.matmul(a_input, self.a)).squeeze(3) # (b, n, n, 1)
if self.use_bias:
e += self.bias # (b, n, n, 1)
# Attention weights
attention = torch.softmax(e, dim=2)
attention = torch.dropout(attention, self.dropout, train=self.training)
h = self.sigmoid(torch.matmul(attention, x)) # (b, n, k)
return h
def _make_attention_input(self, v):
"""Preparing the temporal attention mechanism.
Creating matrix with all possible combinations of concatenations of node values:
(v1, v2..)_t1 || (v1, v2..)_t1
(v1, v2..)_t1 || (v1, v2..)_t2
...
...
(v1, v2..)_tn || (v1, v2..)_t1
(v1, v2..)_tn || (v1, v2..)_t2
"""
K = self.num_nodes
blocks_repeating = v.repeat_interleave(K, dim=1) # Left-side of the matrix
blocks_alternating = v.repeat(1, K, 1) # Right-side of the matrix
combined = torch.cat((blocks_repeating, blocks_alternating), dim=2)
if self.use_gatv2:
return combined.view(v.size(0), K, K, 2 * self.n_features)
else:
return combined.view(v.size(0), K, K, 2 * self.embed_dim)
class GRULayer(nn.Module):
"""Gated Recurrent Unit (GRU) Layer
:param in_dim: number of input features
:param hid_dim: hidden size of the GRU
:param n_layers: number of layers in GRU
:param dropout: dropout rate
"""
def __init__(self, in_dim, hid_dim, n_layers, dropout):
super(GRULayer, self).__init__()
self.hid_dim = hid_dim
self.n_layers = n_layers
self.dropout = 0.0 if n_layers == 1 else dropout
self.gru = nn.GRU(in_dim, hid_dim, num_layers=n_layers, batch_first=True, dropout=self.dropout)
def forward(self, x):
out, h = self.gru(x)
out, h = out[-1, :, :], h[-1, :, :] # Extracting from last layer
return out, h
class RNNDecoder(nn.Module):
"""GRU-based Decoder network that converts latent vector into output
:param in_dim: number of input features
:param n_layers: number of layers in RNN
:param hid_dim: hidden size of the RNN
:param dropout: dropout rate
"""
def __init__(self, in_dim, hid_dim, n_layers, dropout):
super(RNNDecoder, self).__init__()
self.in_dim = in_dim
self.dropout = 0.0 if n_layers == 1 else dropout
self.rnn = nn.GRU(in_dim, hid_dim, n_layers, batch_first=True, dropout=self.dropout)
def forward(self, x):
decoder_out, _ = self.rnn(x)
return decoder_out
class ReconstructionModel(nn.Module):
"""Reconstruction Model
:param window_size: length of the input sequence
:param in_dim: number of input features
:param n_layers: number of layers in RNN
:param hid_dim: hidden size of the RNN
:param in_dim: number of output features
:param dropout: dropout rate
"""
def __init__(self, window_size, in_dim, hid_dim, out_dim, n_layers, dropout):
super(ReconstructionModel, self).__init__()
self.window_size = window_size
self.decoder = RNNDecoder(in_dim, hid_dim, n_layers, dropout)
self.fc = nn.Linear(hid_dim, out_dim)
def forward(self, x):
# x will be last hidden state of the GRU layer
h_end = x
h_end_rep = h_end.repeat_interleave(self.window_size, dim=1).view(x.size(0), self.window_size, -1)
decoder_out = self.decoder(h_end_rep)
out = self.fc(decoder_out)
return out
class Forecasting_Model(nn.Module):
"""Forecasting model (fully-connected network)
:param in_dim: number of input features
:param hid_dim: hidden size of the FC network
:param out_dim: number of output features
:param n_layers: number of FC layers
:param dropout: dropout rate
"""
def __init__(self, in_dim, hid_dim, out_dim, n_layers, dropout):
super(Forecasting_Model, self).__init__()
layers = [nn.Linear(in_dim, hid_dim)]
for _ in range(n_layers - 1):
layers.append(nn.Linear(hid_dim, hid_dim))
layers.append(nn.Linear(hid_dim, out_dim))
self.layers = nn.ModuleList(layers)
self.dropout = nn.Dropout(dropout)
self.relu = nn.ReLU()
def forward(self, x):
for i in range(len(self.layers) - 1):
x = self.relu(self.layers[i](x))
x = self.dropout(x)
return self.layers[-1](x)
class Model(nn.Module):
""" MTAD_GAT model class.
:param n_features: Number of input features
:param window_size: Length of the input sequence
:param out_dim: Number of features to output
:param kernel_size: size of kernel to use in the 1-D convolution
:param feat_gat_embed_dim: embedding dimension (output dimension of linear transformation)
in feat-oriented GAT layer
:param time_gat_embed_dim: embedding dimension (output dimension of linear transformation)
in time-oriented GAT layer
:param use_gatv2: whether to use the modified attention mechanism of GATv2 instead of standard GAT
:param gru_n_layers: number of layers in the GRU layer
:param gru_hid_dim: hidden dimension in the GRU layer
:param forecast_n_layers: number of layers in the FC-based Forecasting Model
:param forecast_hid_dim: hidden dimension in the FC-based Forecasting Model
:param recon_n_layers: number of layers in the GRU-based Reconstruction Model
:param recon_hid_dim: hidden dimension in the GRU-based Reconstruction Model
:param dropout: dropout rate
:param alpha: negative slope used in the leaky rely activation function
"""
def __init__(self, customs: {}, dataloader: tud.DataLoader):
super(Model, self).__init__()
n_features = dataloader.dataset.train_inputs.shape[-1]
window_size = int(customs["input_size"])
out_dim = n_features
kernel_size = 7
feat_gat_embed_dim = None
time_gat_embed_dim = None
use_gatv2 = True
gru_n_layers = 1
gru_hid_dim = 150
forecast_n_layers = 1
forecast_hid_dim = 150
recon_n_layers = 1
recon_hid_dim = 150
dropout = 0.2
alpha = 0.2
self.name = "MtadGat"
self.conv = ConvLayer(n_features, kernel_size)
self.feature_gat = FeatureAttentionLayer(
n_features, window_size, dropout, alpha, feat_gat_embed_dim, use_gatv2)
self.temporal_gat = TemporalAttentionLayer(n_features, window_size, dropout, alpha, time_gat_embed_dim,
use_gatv2)
self.gru = GRULayer(3 * n_features, gru_hid_dim, gru_n_layers, dropout)
self.forecasting_model = Forecasting_Model(
gru_hid_dim, forecast_hid_dim, out_dim, forecast_n_layers, dropout)
self.recon_model = ReconstructionModel(window_size, gru_hid_dim, recon_hid_dim, out_dim, recon_n_layers,
dropout)
def forward(self, x):
# x shape (b, n, k): b - batch size, n - window size, k - number of features
x = self.conv(x)
h_feat = self.feature_gat(x)
h_temp = self.temporal_gat(x)
h_cat = torch.cat([x, h_feat, h_temp], dim=2) # (b, n, 3k)
_, h_end = self.gru(h_cat)
h_end = h_end.view(x.shape[0], -1) # Hidden state for last timestamp
predictions = self.forecasting_model(h_end)
recons = self.recon_model(h_end)
return predictions, recons
def loss(self, x, y_true, epoch: int = None, device: str = "cpu"):
preds, recons = self.forward(x)
if preds.ndim == 3:
preds = preds.squeeze(1)
if y_true.ndim == 3:
y_true = y_true.squeeze(1)
forecast_criterion = nn.MSELoss()
recon_criterion = nn.MSELoss()
forecast_loss = torch.sqrt(forecast_criterion(y_true, preds))
recon_loss = torch.sqrt(recon_criterion(x, recons))
loss = forecast_loss + recon_loss
loss.backward()
return loss.item()
def detection(self, x, y_true, epoch: int = None, device: str = "cpu"):
preds, recons = self.forward(x)
score = F.pairwise_distance(recons.reshape(recons.size(0), -1), x.reshape(x.size(0), -1)) + \
F.pairwise_distance(y_true.reshape(y_true.size(0), -1), preds.reshape(preds.size(0), -1))
return score, None

647
method/MtadGatAtt.py Normal file
View File

@@ -0,0 +1,647 @@
import torch
import torch.nn as nn
from math import sqrt
import torch.nn.functional as F
import numpy as np
import torch.utils.data as tud
class ConvLayer(nn.Module):
"""1-D Convolution layer to extract high-level features of each time-series input
:param n_features: Number of input features/nodes
:param window_size: length of the input sequence
:param kernel_size: size of kernel to use in the convolution operation
"""
def __init__(self, n_features, kernel_size=7):
super(ConvLayer, self).__init__()
self.padding = nn.ConstantPad1d((kernel_size - 1) // 2, 0.0)
self.conv = nn.Conv1d(in_channels=n_features, out_channels=n_features, kernel_size=kernel_size)
self.relu = nn.ReLU()
def forward(self, x):
x = x.permute(0, 2, 1)
x = self.padding(x)
x = self.relu(self.conv(x))
return x.permute(0, 2, 1) # Permute back
class FeatureAttentionLayer(nn.Module):
"""Single Graph Feature/Spatial Attention Layer
:param n_features: Number of input features/nodes
:param window_size: length of the input sequence
:param dropout: percentage of nodes to dropout
:param alpha: negative slope used in the leaky rely activation function
:param embed_dim: embedding dimension (output dimension of linear transformation)
:param use_gatv2: whether to use the modified attention mechanism of GATv2 instead of standard GAT
:param use_bias: whether to include a bias term in the attention layer
"""
def __init__(self, n_features, window_size, dropout, alpha, embed_dim=None, use_gatv2=True, use_bias=True,
use_softmax=True):
super(FeatureAttentionLayer, self).__init__()
self.n_features = n_features
self.window_size = window_size
self.dropout = dropout
self.embed_dim = embed_dim if embed_dim is not None else window_size
self.use_gatv2 = use_gatv2
self.num_nodes = n_features
self.use_bias = use_bias
self.use_softmax = use_softmax
# Because linear transformation is done after concatenation in GATv2
if self.use_gatv2:
self.embed_dim *= 2
lin_input_dim = 2 * window_size
a_input_dim = self.embed_dim
else:
lin_input_dim = window_size
a_input_dim = 2 * self.embed_dim
self.lin = nn.Linear(lin_input_dim, self.embed_dim)
self.a = nn.Parameter(torch.empty((a_input_dim, 1)))
nn.init.xavier_uniform_(self.a.data, gain=1.414)
if self.use_bias:
self.bias = nn.Parameter(torch.ones(n_features, n_features))
self.leakyrelu = nn.LeakyReLU(alpha)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
# x shape (b, n, k): b - batch size, n - window size, k - number of features
# For feature attention we represent a node as the values of a particular feature across all timestamps
x = x.permute(0, 2, 1)
# 'Dynamic' GAT attention
# Proposed by Brody et. al., 2021 (https://arxiv.org/pdf/2105.14491.pdf)
# Linear transformation applied after concatenation and attention layer applied after leakyrelu
if self.use_gatv2:
a_input = self._make_attention_input(x) # (b, k, k, 2*window_size)
a_input = self.leakyrelu(self.lin(a_input)) # (b, k, k, embed_dim)
e = torch.matmul(a_input, self.a).squeeze(3) # (b, k, k, 1)
# Original GAT attention
else:
Wx = self.lin(x) # (b, k, k, embed_dim)
a_input = self._make_attention_input(Wx) # (b, k, k, 2*embed_dim)
e = self.leakyrelu(torch.matmul(a_input, self.a)).squeeze(3) # (b, k, k, 1)
if self.use_bias:
e += self.bias
# Attention weights
if self.use_softmax:
e = torch.softmax(e, dim=2)
attention = torch.dropout(e, self.dropout, train=self.training)
# Computing new node features using the attention
h = self.sigmoid(torch.matmul(attention, x))
return h.permute(0, 2, 1)
def _make_attention_input(self, v):
"""Preparing the feature attention mechanism.
Creating matrix with all possible combinations of concatenations of node.
Each node consists of all values of that node within the window
v1 || v1,
...
v1 || vK,
v2 || v1,
...
v2 || vK,
...
...
vK || v1,
...
vK || vK,
"""
K = self.num_nodes
blocks_repeating = v.repeat_interleave(K, dim=1) # Left-side of the matrix
blocks_alternating = v.repeat(1, K, 1) # Right-side of the matrix
combined = torch.cat((blocks_repeating, blocks_alternating), dim=2) # (b, K*K, 2*window_size)
if self.use_gatv2:
return combined.view(v.size(0), K, K, 2 * self.window_size)
else:
return combined.view(v.size(0), K, K, 2 * self.embed_dim)
class TemporalAttentionLayer(nn.Module):
"""Single Graph Temporal Attention Layer
:param n_features: number of input features/nodes
:param window_size: length of the input sequence
:param dropout: percentage of nodes to dropout
:param alpha: negative slope used in the leaky rely activation function
:param embed_dim: embedding dimension (output dimension of linear transformation)
:param use_gatv2: whether to use the modified attention mechanism of GATv2 instead of standard GAT
:param use_bias: whether to include a bias term in the attention layer
"""
def __init__(self, n_features, window_size, dropout, alpha, embed_dim=None, use_gatv2=True, use_bias=True,
use_softmax=True):
super(TemporalAttentionLayer, self).__init__()
self.n_features = n_features
self.window_size = window_size
self.dropout = dropout
self.use_gatv2 = use_gatv2
self.embed_dim = embed_dim if embed_dim is not None else n_features
self.num_nodes = window_size
self.use_bias = use_bias
self.use_softmax = use_softmax
# Because linear transformation is performed after concatenation in GATv2
if self.use_gatv2:
self.embed_dim *= 2
lin_input_dim = 2 * n_features
a_input_dim = self.embed_dim
else:
lin_input_dim = n_features
a_input_dim = 2 * self.embed_dim
self.lin = nn.Linear(lin_input_dim, self.embed_dim)
self.a = nn.Parameter(torch.empty((a_input_dim, 1)))
nn.init.xavier_uniform_(self.a.data, gain=1.414)
if self.use_bias:
self.bias = nn.Parameter(torch.ones(window_size, window_size))
self.leakyrelu = nn.LeakyReLU(alpha)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
# x shape (b, n, k): b - batch size, n - window size, k - number of features
# For temporal attention a node is represented as all feature values at a specific timestamp
# 'Dynamic' GAT attention
# Proposed by Brody et. al., 2021 (https://arxiv.org/pdf/2105.14491.pdf)
# Linear transformation applied after concatenation and attention layer applied after leakyrelu
if self.use_gatv2:
a_input = self._make_attention_input(x) # (b, n, n, 2*n_features)
a_input = self.leakyrelu(self.lin(a_input)) # (b, n, n, embed_dim)
e = torch.matmul(a_input, self.a).squeeze(3) # (b, n, n, 1)
# Original GAT attention
else:
Wx = self.lin(x) # (b, n, n, embed_dim)
a_input = self._make_attention_input(Wx) # (b, n, n, 2*embed_dim)
e = self.leakyrelu(torch.matmul(a_input, self.a)).squeeze(3) # (b, n, n, 1)
if self.use_bias:
e += self.bias # (b, n, n, 1)
# Attention weights
if self.use_softmax:
e = torch.softmax(e, dim=2)
attention = torch.dropout(e, self.dropout, train=self.training)
h = self.sigmoid(torch.matmul(attention, x)) # (b, n, k)
return h
def _make_attention_input(self, v):
"""Preparing the temporal attention mechanism.
Creating matrix with all possible combinations of concatenations of node values:
(v1, v2..)_t1 || (v1, v2..)_t1
(v1, v2..)_t1 || (v1, v2..)_t2
...
...
(v1, v2..)_tn || (v1, v2..)_t1
(v1, v2..)_tn || (v1, v2..)_t2
"""
K = self.num_nodes
blocks_repeating = v.repeat_interleave(K, dim=1) # Left-side of the matrix
blocks_alternating = v.repeat(1, K, 1) # Right-side of the matrix
combined = torch.cat((blocks_repeating, blocks_alternating), dim=2)
if self.use_gatv2:
return combined.view(v.size(0), K, K, 2 * self.n_features)
else:
return combined.view(v.size(0), K, K, 2 * self.embed_dim)
class FullAttention(nn.Module):
def __init__(self, mask_flag=True, scale=None, attention_dropout=0.1, output_attention=False):
super(FullAttention, self).__init__()
self.scale = scale
self.mask_flag = mask_flag
self.output_attention = output_attention
self.dropout = nn.Dropout(attention_dropout)
self.relu_q = nn.ReLU()
self.relu_k = nn.ReLU()
@staticmethod
def TriangularCausalMask(B, L, S, device='cpu'):
mask_shape = [B, 1, L, S]
with torch.no_grad():
mask = torch.triu(torch.ones(mask_shape, dtype=torch.bool), diagonal=1)
return mask.to(device)
def forward(self, queries, keys, values, attn_mask):
B, L, H, E = queries.shape
_, S, _, D = values.shape
scale = self.scale or 1. / sqrt(E) # scale相对于取多少比例取前1/根号n
scores = torch.einsum("blhe,bshe->bhls", queries, keys)
if self.mask_flag:
if attn_mask is None:
attn_mask = self.TriangularCausalMask(B, L, S, device=queries.device)
scores.masked_fill_(attn_mask, 0)
A = self.dropout(torch.softmax(scale * scores, dim=-1))
V = torch.einsum("bhls,bshd->blhd", A, values)
# queries = self.relu_q(queries)
# keys = self.relu_k(keys)
# KV = torch.einsum("blhe,bshe->bhls", keys, values)
# A = self.dropout(scale * KV)
# V = torch.einsum("bshd,bhls->blhd", queries, A)
if self.output_attention:
return (V.contiguous(), A)
else:
return (V.contiguous(), None)
class ProbAttention(nn.Module):
def __init__(self, mask_flag=True, factor=2, scale=None, attention_dropout=0.1, output_attention=False):
super(ProbAttention, self).__init__()
self.factor = factor
self.scale = scale
self.mask_flag = mask_flag
self.output_attention = output_attention
@staticmethod
def ProbMask(B, H, D, index, scores, device='cpu'):
_mask = torch.ones(D, scores.shape[-2], dtype=torch.bool).triu(1)
_mask_ex = _mask[None, None, :].expand(B, H, D, scores.shape[-2])
indicator = _mask_ex.transpose(-2, -1)[torch.arange(B)[:, None, None],
torch.arange(H)[None, :, None],
index, :].transpose(-2, -1)
mask = indicator.view(scores.shape)
return mask.to(device)
def _prob_KV(self, K, V, sample_v, n_top): # n_top: c*ln(L_q)
# Q [B, H, L, D]
B, H, L, E_V = V.shape
_, _, _, E_K = K.shape
# calculate the sampled K_V
V_expand = V.transpose(-2, -1).unsqueeze(-2).expand(B, H, E_V, E_K, L)
index_sample = torch.randint(E_V, (E_K, sample_v)) # real U = U_part(factor*ln(L_k))*L_q
V_sample = V_expand[:, :, torch.arange(E_V).unsqueeze(1), index_sample, :]
K_V_sample = torch.matmul(K.transpose(-2, -1).unsqueeze(-2), V_sample.transpose(-2, -1)).squeeze()
# find the Top_k query with sparisty measurement
M = K_V_sample.max(-1)[0] - torch.div(K_V_sample.sum(-1), E_V)
M_top = M.topk(n_top, sorted=False)[1]
# use the reduced Q to calculate Q_K
V_reduce = V.transpose(-2, -1)[torch.arange(B)[:, None, None],
torch.arange(H)[None, :, None],
M_top, :].transpose(-2, -1) # factor*ln(L_q)
K_V = torch.matmul(K.transpose(-2, -1), V_reduce) # factor*ln(L_q)*L_k
#
return K_V, M_top
def _get_initial_context(self, V, L_Q):
B, H, L_V, D = V.shape
if not self.mask_flag:
# V_sum = V.sum(dim=-2)
V_sum = V.mean(dim=-2)
contex = V_sum.unsqueeze(-2).expand(B, H, L_Q, V_sum.shape[-1]).clone()
else: # use mask
assert (L_Q == L_V) # requires that L_Q == L_V, i.e. for self-attention only
contex = V.cumsum(dim=-2)
return contex
def _update_context(self, context_in, Q, scores, index, D_K, attn_mask):
B, H, L, D_Q = Q.shape
if self.mask_flag:
attn_mask = self.ProbMask(B, H, D_K, index, scores, device=Q.device)
scores.masked_fill_(attn_mask, -np.inf)
attn = torch.softmax(scores, dim=-1) # nn.Softmax(dim=-1)(scores)
context_in.transpose(-2, -1)[torch.arange(B)[:, None, None],
torch.arange(H)[None, :, None],
index, :] = torch.matmul(Q, attn).type_as(context_in).transpose(-2, -1)
if self.output_attention:
attns = (torch.ones([B, H, D_K, D_K]) / D_K).type_as(attn)
attns[torch.arange(B)[:, None, None], torch.arange(H)[None, :, None], index, :] = attn
return (context_in, attns)
else:
return (context_in, None)
def forward(self, queries, keys, values, attn_mask):
# B, L_Q, H, D = queries.shape
# _, L_K, _, _ = keys.shape
B, L, H, D_K = keys.shape
_, _, _, D_V = values.shape
queries = queries.transpose(2, 1)
keys = keys.transpose(2, 1)
values = values.transpose(2, 1)
U_part = self.factor * np.ceil(np.log(D_V)).astype('int').item() # c*ln(L_k)
u = self.factor * np.ceil(np.log(D_K)).astype('int').item() # c*ln(L_q)
U_part = U_part if U_part < D_V else D_V
u = u if u < D_K else D_K
scores_top, index = self._prob_KV(keys, values, sample_v=U_part, n_top=u)
# add scale factor
scale = self.scale or 1. / sqrt(D_K)
if scale is not None:
scores_top = scores_top * scale
# get the context
context = self._get_initial_context(queries, L)
# update the context with selected top_k queries
context, attn = self._update_context(context, queries, scores_top, index, D_K, attn_mask)
return context.contiguous(), attn
class AttentionBlock(nn.Module):
def __init__(self, d_model, n_model, n_heads=8, d_keys=None, d_values=None):
super(AttentionBlock, self).__init__()
d_keys = d_keys or (d_model // n_heads)
d_values = d_values or (d_model // n_heads)
self.inner_attention = FullAttention()
# self.inner_attention = ProbAttention(device=device)
self.query_projection = nn.Linear(d_model, d_keys * n_heads)
self.key_projection = nn.Linear(d_model, d_keys * n_heads)
self.value_projection = nn.Linear(d_model, d_values * n_heads)
self.out_projection = nn.Linear(d_values * n_heads, d_model)
self.n_heads = n_heads
self.layer_norm = nn.LayerNorm(d_model, eps=1e-6)
def forward(self, queries, keys, values, attn_mask):
'''
Q: [batch_size, len_q, d_k]
K: [batch_size, len_k, d_k]
V: [batch_size, len_v(=len_k), d_v]
attn_mask: [batch_size, seq_len, seq_len]
'''
batch_size, len_q, _ = queries.shape
_, len_k, _ = keys.shape
queries = self.query_projection(queries).view(batch_size, len_q, self.n_heads, -1)
keys = self.key_projection(keys).view(batch_size, len_k, self.n_heads, -1)
values = self.value_projection(values).view(batch_size, len_k, self.n_heads, -1)
out, attn = self.inner_attention(
queries,
keys,
values,
attn_mask
)
out = out.view(batch_size, len_q, -1)
out = self.out_projection(out)
out = self.layer_norm(out)
return out, attn
class GRULayer(nn.Module):
"""Gated Recurrent Unit (GRU) Layer
:param in_dim: number of input features
:param hid_dim: hidden size of the GRU
:param n_layers: number of layers in GRU
:param dropout: dropout rate
"""
def __init__(self, in_dim, hid_dim, n_layers, dropout):
super(GRULayer, self).__init__()
self.hid_dim = hid_dim
self.n_layers = n_layers
self.dropout = 0.0 if n_layers == 1 else dropout
self.gru = nn.GRU(in_dim, hid_dim, num_layers=n_layers, batch_first=True, dropout=self.dropout)
def forward(self, x):
out, h = self.gru(x)
out, h = out[-1, :, :], h[-1, :, :] # Extracting from last layer
return out, h
class RNNDecoder(nn.Module):
"""GRU-based Decoder network that converts latent vector into output
:param in_dim: number of input features
:param n_layers: number of layers in RNN
:param hid_dim: hidden size of the RNN
:param dropout: dropout rate
"""
def __init__(self, in_dim, hid_dim, n_layers, dropout):
super(RNNDecoder, self).__init__()
self.in_dim = in_dim
self.dropout = 0.0 if n_layers == 1 else dropout
self.rnn = nn.GRU(in_dim, hid_dim, n_layers, batch_first=True, dropout=self.dropout)
def forward(self, x):
decoder_out, _ = self.rnn(x)
return decoder_out
class ReconstructionModel(nn.Module):
"""Reconstruction Model
:param window_size: length of the input sequence
:param in_dim: number of input features
:param n_layers: number of layers in RNN
:param hid_dim: hidden size of the RNN
:param in_dim: number of output features
:param dropout: dropout rate
"""
def __init__(self, window_size, in_dim, hid_dim, out_dim, n_layers, dropout):
super(ReconstructionModel, self).__init__()
self.window_size = window_size
self.decoder = RNNDecoder(in_dim, hid_dim, n_layers, dropout)
self.fc = nn.Linear(hid_dim, out_dim)
def forward(self, x):
# x will be last hidden state of the GRU layer
h_end = x
h_end_rep = h_end.repeat_interleave(self.window_size, dim=1).view(x.size(0), self.window_size, -1)
decoder_out = self.decoder(h_end_rep)
out = self.fc(decoder_out)
return out
class Forecasting_Model(nn.Module):
"""Forecasting model (fully-connected network)
:param in_dim: number of input features
:param hid_dim: hidden size of the FC network
:param out_dim: number of output features
:param n_layers: number of FC layers
:param dropout: dropout rate
"""
def __init__(self, in_dim, hid_dim, out_dim, n_layers, dropout):
super(Forecasting_Model, self).__init__()
layers = [nn.Linear(in_dim, hid_dim)]
for _ in range(n_layers - 1):
layers.append(nn.Linear(hid_dim, hid_dim))
layers.append(nn.Linear(hid_dim, out_dim))
self.layers = nn.ModuleList(layers)
self.dropout = nn.Dropout(dropout)
self.relu = nn.ReLU()
def forward(self, x):
for i in range(len(self.layers) - 1):
x = self.relu(self.layers[i](x))
x = self.dropout(x)
return self.layers[-1](x)
class Model(nn.Module):
""" MTAD_GAT model class.
:param n_features: Number of input features
:param window_size: Length of the input sequence
:param out_dim: Number of features to output
:param kernel_size: size of kernel to use in the 1-D convolution
:param feat_gat_embed_dim: embedding dimension (output dimension of linear transformation)
in feat-oriented GAT layer
:param time_gat_embed_dim: embedding dimension (output dimension of linear transformation)
in time-oriented GAT layer
:param use_gatv2: whether to use the modified attention mechanism of GATv2 instead of standard GAT
:param gru_n_layers: number of layers in the GRU layer
:param gru_hid_dim: hidden dimension in the GRU layer
:param forecast_n_layers: number of layers in the FC-based Forecasting Model
:param forecast_hid_dim: hidden dimension in the FC-based Forecasting Model
:param recon_n_layers: number of layers in the GRU-based Reconstruction Model
:param recon_hid_dim: hidden dimension in the GRU-based Reconstruction Model
:param dropout: dropout rate
:param alpha: negative slope used in the leaky rely activation function
"""
def __init__(self, customs: dict, dataloader: tud.DataLoader = None):
super(Model, self).__init__()
n_features = dataloader.dataset.train_inputs.shape[-1]
window_size = int(customs["input_size"])
out_dim = n_features
kernel_size = 7
feat_gat_embed_dim = None
time_gat_embed_dim = None
use_gatv2 = True
gru_n_layers = 1
gru_hid_dim = 150
forecast_n_layers = 1
forecast_hid_dim = 150
recon_n_layers = 1
recon_hid_dim = 150
dropout = 0.2
alpha = 0.2
optimize = True
self.name = "MtadGatAtt"
self.optimize = optimize
use_softmax = not optimize
self.conv = ConvLayer(n_features, kernel_size)
self.feature_gat = FeatureAttentionLayer(
n_features, window_size, dropout, alpha, feat_gat_embed_dim, use_gatv2, use_softmax=use_softmax)
self.temporal_gat = TemporalAttentionLayer(n_features, window_size, dropout, alpha, time_gat_embed_dim,
use_gatv2, use_softmax=use_softmax)
self.forecasting_model = Forecasting_Model(
gru_hid_dim, forecast_hid_dim, out_dim, forecast_n_layers, dropout)
if optimize:
self.encode = AttentionBlock(3 * n_features, window_size)
self.encode_feature = nn.Linear(3 * n_features * window_size, gru_hid_dim)
self.decode_feature = nn.Linear(gru_hid_dim, n_features * window_size)
self.decode = AttentionBlock(n_features, window_size)
else:
self.gru = GRULayer(3 * n_features, gru_hid_dim, gru_n_layers, dropout)
self.recon_model = ReconstructionModel(window_size, gru_hid_dim, recon_hid_dim, out_dim, recon_n_layers,
dropout)
def forward(self, x):
x = self.conv(x)
h_feat = self.feature_gat(x)
h_temp = self.temporal_gat(x)
h_cat = torch.cat([x, h_feat, h_temp], dim=2) # (b, n, 3k)
if self.optimize:
h_end, _ = self.encode(h_cat, h_cat, h_cat, None)
h_end = self.encode_feature(h_end.reshape(h_end.size(0), -1))
else:
_, h_end = self.gru(h_cat)
h_end = h_end.view(x.shape[0], -1) # Hidden state for last timestamp
predictions = self.forecasting_model(h_end)
if self.optimize:
h_end = self.decode_feature(h_end)
h_end = h_end.reshape(x.shape[0], x.shape[1], x.shape[2])
recons, _ = self.decode(h_end, h_end, h_end, None)
else:
recons = self.recon_model(h_end)
return predictions, recons
def loss(self, x, y_true, epoch: int = None, device: str = "cpu"):
preds, recons = self.forward(x)
if preds.ndim == 3:
preds = preds.squeeze(1)
if y_true.ndim == 3:
y_true = y_true.squeeze(1)
forecast_criterion = nn.MSELoss()
recon_criterion = nn.MSELoss()
forecast_loss = torch.sqrt(forecast_criterion(y_true, preds))
recon_loss = torch.sqrt(recon_criterion(x, recons))
loss = forecast_loss + recon_loss
loss.backward()
return loss.item()
def detection(self, x, y_true, epoch: int = None, device: str = "cpu"):
preds, recons = self.forward(x)
score = F.pairwise_distance(recons.reshape(recons.size(0), -1), x.reshape(x.size(0), -1)) + F.pairwise_distance(y_true.reshape(y_true.size(0), -1), preds.reshape(preds.size(0), -1))
return score, None
if __name__ == "__main__":
from tqdm import tqdm
import time
epoch = 10000
batch_size = 1
# device = 'cuda:1' if torch.cuda.is_available() else 'cpu'
device = 'cpu'
input_len_list = [30, 60, 90, 120, 150, 180, 210, 240, 270, 300]
for input_len in input_len_list:
model = Model(52, input_len, 52, optimize=False, device=device).to(device)
a = torch.Tensor(torch.ones((batch_size, input_len, 52))).to(device)
start = time.time()
for i in tqdm(range(epoch)):
model(a)
end = time.time()
speed1 = batch_size * epoch / (end - start)
model = Model(52, input_len, 52, optimize=True, device=device).to(device)
a = torch.Tensor(torch.ones((batch_size, input_len, 52))).to(device)
start = time.time()
for i in tqdm(range(epoch)):
model(a)
end = time.time()
speed2 = batch_size * epoch / (end - start)
print(input_len, (speed2 - speed1)/speed1, speed1, speed2)

3
method/__init__.py Normal file
View File

@@ -0,0 +1,3 @@
from .MtadGat import Model
from .MtadGatAtt import Model
from .AnomalyTransformer import Model

50
method/template.py Normal file
View File

@@ -0,0 +1,50 @@
import torch.nn as nn
import torch.utils.data as tud
import torch
class Model(nn.Module):
def __init__(self, customs: dict, dataloader: tud.DataLoader = None):
"""
:param customs: 自定义参数内容取自于config.ini文件的[CustomParameters]部分。
:param dataloader: 数据集初始化完成的dataloader。在自定义的预处理方法文件中可以增加内部变量或者方法提供给模型。
例如模型初始化需要数据的维度数量可通过n_features = dataloader.dataset.train_inputs.shape[-1]获取
或在预处理方法的MyDataset类中定义self.n_features = self.train_inputs.shape[-1]
通过n_features = dataloader.dataset.n_features获取
"""
super(Model, self).__init__()
def forward(self, x):
"""
:param x: 模型的输入在本工具中为MyDataset类中__getitem__方法返回的三个变量中的第一个变量。
:return: 模型的输出,可以自定义
"""
return None
def loss(self, x, y_true, epoch: int = None, device: str = "cpu"):
"""
计算loss。注意计算loss时如采用torch之外的库计算会造成梯度截断请全部使用torch的方法
:param x: 输入数据
:param y_true: 真实输出数据
:param epoch: 当前是第几个epoch
:param device: 设备cpu或者cuda
:return: loss值
"""
y_pred = self.forward(x) # 模型的输出
loss = torch.Tensor([1]) # 示例,请修改
loss.backward()
return loss.item()
def detection(self, x, y_true, epoch: int = None, device: str = "cpu"):
"""
检测方法,可以输出异常的分数,也可以输出具体的标签。
如输出异常分数,则后续会根据异常分数自动划分阈值,高于阈值的为异常,自动赋予标签;如输出标签,则直接进行评估。
:param x: 输入数据
:param y_true: 真实输出数据
:param epoch: 当前是第几个epoch
:param device: 设备cpu或者cuda
:return: scorelabel。如选择输出异常的分数则输出scorelabel为None如选择输出标签则输出labelscore为None。
score的格式为torch的Tensor格式尺寸为[batch_size]label的格式为torch的IntTensor格式尺寸为[batch_size]
"""
y_pred = self.forward(x) # 模型的输出
return None, None

1
preprocess/__init__.py Normal file
View File

@@ -0,0 +1 @@
from .standardization import MyDataset

View File

@@ -0,0 +1,119 @@
from torch.utils.data import Dataset
from torch import float32, Tensor
from numpy import array, where
class MyDataset(Dataset):
def __init__(self, name: str, train_path: str = None, test_path: str = None, input_size: int = 1,
output_size: int = 1, step: int = 1, mode: str = 'train', time_index: bool = True,
del_column_name: bool = True):
"""
可以将csv文件批量转成tensor
:param name: 数据集名称。
:param train_path: 训练数据集路径。
:param test_path: 测试数据集路径。
:param input_size: 输入数据长度。
:param output_size: 输出数据长度。
:param step: 截取数据的窗口移动间隔。
:param mode: train或者test用于指示使用训练集数据还是测试集数据。
:param time_index: True为第一列是时间戳False为不。
:param del_column_name: 文件中第一行为列名时使用True。
"""
self.name = name
self.input_size = input_size
self.output_size = output_size
self.del_column_name = del_column_name
self.step = step
self.mode = mode
self.time_index = time_index
self.train_inputs, self.train_labels, self.train_outputs, self.test_inputs, self.test_labels, self.test_outputs\
= self.parse_data(train_path, test_path)
self.train_inputs = Tensor(self.train_inputs).to(float32) if self.train_inputs is not None else None
self.train_labels = Tensor(self.train_labels).to(float32) if self.train_labels is not None else None
self.train_outputs = Tensor(self.train_outputs).to(float32) if self.train_outputs is not None else None
self.test_inputs = Tensor(self.test_inputs).to(float32) if self.test_inputs is not None else None
self.test_labels = Tensor(self.test_labels).to(float32) if self.test_labels is not None else None
self.test_outputs = Tensor(self.test_outputs).to(float32) if self.test_outputs is not None else None
def parse_data(self, train_path: str = None, test_path: str = None):
if train_path is None and test_path is None:
raise ValueError("train_path is None and test_path is None.")
mean = None
deviation = None
train_data_input, train_label, train_data_output = None, None, None
test_data_input, test_label, test_data_output = None, None, None
# 读取训练集数据
if train_path:
train_data = []
train_label = []
with open(train_path, 'r', encoding='utf8') as f:
if self.del_column_name is True:
data = f.readlines()[1:]
else:
data = f.readlines()
train_data.extend([list(map(float, line.strip().split(','))) for line in data])
train_label.extend([0 for _ in data])
train_np = array(train_data)
if self.time_index:
train_np[:, 0] = train_np[:, 0] % 86400
mean = train_np.mean(axis=0) # 计算平均数
deviation = train_np.std(axis=0) # 计算标准差
deviation = where(deviation != 0, deviation, 1)
train_np = (train_np - mean) / deviation # 标准化
train_data = train_np.tolist()
train_data_input, train_data_output, train_label = self.cut_data(train_data, train_label)
# 读取测试集数据
if test_path:
test_data = []
test_label = []
with open(test_path, 'r', encoding='utf8') as f:
if self.del_column_name is True:
data = f.readlines()[1:]
else:
data = f.readlines()
test_data.extend([list(map(float, line.strip().split(',')))[:-1] for line in data])
test_label.extend([int(line.strip().split(',')[-1]) for line in data])
test_np = array(test_data)
if self.time_index:
test_np[:, 0] = test_np[:, 0] % 86400
# mean = test_np.mean(axis=0) # 计算平均数
# deviation = test_np.std(axis=0) # 计算标准差
# deviation = where(deviation != 0, deviation, 1)
test_np = (test_np - mean) / deviation # 标准化
test_data = test_np.tolist()
# 自动判断是否需要反转标签。异常标签统一认为是1当异常标签超过一半时需反转标签
if sum(test_label) > 0.5*len(test_label):
test_label = (1-array(test_label)).tolist()
test_data_input, test_data_output, test_label = self.cut_data(test_data, test_label)
return train_data_input, train_label, train_data_output, test_data_input, test_label, test_data_output
def cut_data(self, data: [[float]], label: [int]):
n = 0
input_data, output_data, anomaly_label = [], [], []
while n + self.input_size + self.output_size <= len(data):
input_data.append(data[n: n + self.input_size])
output_data.append(data[n + self.input_size: n + self.input_size + self.output_size])
anomaly_label.append([max(label[n + self.input_size: n + self.input_size + self.output_size])])
n = n + self.step
return input_data.copy(), output_data.copy(), anomaly_label.copy()
def __len__(self):
if self.mode == 'train':
return self.train_inputs.shape[0]
elif self.mode == 'test':
return self.test_inputs.shape[0]
def __getitem__(self, idx):
if self.mode == 'train':
return self.train_inputs[idx], self.train_labels[idx], self.train_outputs[idx]
elif self.mode == 'test':
return self.test_inputs[idx], self.test_labels[idx], self.test_outputs[idx]
if __name__ == "__main__":
app = MyDataset('../dataset/SWAT/train.csv', test_path='../dataset/SWAT/test.csv', input_size=3)
print(app)

61
preprocess/template.py Normal file
View File

@@ -0,0 +1,61 @@
"""
模板文件,有自定义预处理方法可以通过编辑本文件实现数据集预处理。
编辑完成以后请将文件名修改写入config.ini并在同级目录下init文件添加本文件
"""
from torch.utils.data import Dataset
class DataSet(Dataset):
def __init__(self, train_path: str = None, test_path: str = None, input_size: int = 1, output_size: int = 1,
step: int = 1, mode: str = 'train', del_time: bool = True, del_column_name: bool = True,
reverse_label: bool = True):
"""
可以将csv文件批量转成tensor
注意:必须包含以下变量或方法。
变量self.train_inputs、self.train_labels、self.train_outputs
self.test_inputs、self.test_labels、self.test_outputs、self.mode
方法__len__()、__getitem__()
:param train_path: str类型。训练数据集路径。
:param test_path: str类型。测试数据集路径。
:param input_size: int类型。输入数据长度。
:param output_size: int类型。输出数据长度。
:param step: int类型。截取数据的窗口移动间隔。
:param mode: str类型。train或者test用于指示使用训练集数据还是测试集数据。
:param del_time: bool类型。True为删除时间戳列False为不删除。
:param del_column_name: bool类型。文件中第一行为列名时使用True。
:param reverse_label: bool类型。转化标签0和1互换。标签统一采用正常为0异常为1的格式若原文件中不符和该规定使用True。
"""
self.mode = mode
self.train_inputs = None # 训练时的输入数据Tensor格式尺寸为[N,L,D]。N表示训练数据的数量L表示每条数据的长度由多少个时间点组成的数据D表示数据维度数量
self.train_labels = None # 训练时的数据标签Tensor格式尺寸为[N,1]。
self.train_outputs = None # 训练时的输出数据Tensor格式尺寸为[N,L,D]。
self.test_inputs = None # 测试时的输入数据Tensor格式尺寸为[N,L,D]。
self.test_labels = None # 测试时的数据标签Tensor格式尺寸为[N,1]。
self.test_outputs = None # 测试时的输出数据Tensor格式尺寸为[N,L,D]。
def __len__(self):
"""
提供数据集长度
:return: 测试集或者训练集数据长度N
"""
if self.mode == 'train':
return self.train_inputs.shape[0]
elif self.mode == 'test':
return self.test_inputs.shape[0]
def __getitem__(self, idx):
"""
获取数据
:param idx: 数据序号
:return: 对应的输入数据、标签、输出数据
"""
if self.mode == 'train':
return self.train_inputs[idx], self.train_labels[idx], self.train_outputs[idx]
elif self.mode == 'test':
return self.test_inputs[idx], self.test_labels[idx], self.test_outputs[idx]

View File

File diff suppressed because it is too large Load Diff

8
requirements.txt Normal file
View File

@@ -0,0 +1,8 @@
eventlet==0.33.3
gevent==22.10.2
loguru==0.6.0
numpy==1.21.2
pandas==1.4.1
scikit_learn==1.2.2
torch==1.10.2
tqdm==4.63.1

318
utils.py Normal file
View File

@@ -0,0 +1,318 @@
import os
import numpy
import torch.nn as nn
import torch
import torch.utils.data as tud
import preprocess
import evaluation
from loguru import logger
import time
from tqdm import tqdm
def create_dataloader(dataset_name: str, input_size: int = 1, output_size: int = 1, step: int = 1, batch_size: int = 1,
time_index: bool = True, del_column_name: bool = True,
preprocess_name: str = "standardization") -> (tud.DataLoader, tud.DataLoader):
"""
针对一个数据集构建Dataloader
:param dataset_name: 数据集名称
:param input_size: 输入数据长度
:param output_size: 输出数据长度
:param step: 截取数据的窗口移动间隔
:param batch_size: batch的大小
:param time_index: True为第一列是时间戳False为不
:param del_column_name: 文件中第一行为列名时使用True
:param preprocess_name: 预处理方法
:return: 训练数据与测试数据
"""
ds = eval(f"preprocess.{preprocess_name}.MyDataset")(name=dataset_name.replace("/", "-"),
train_path=f"./dataset/{dataset_name}/train.csv",
test_path=f"./dataset/{dataset_name}/test.csv",
input_size=input_size, output_size=output_size,
step=step, time_index=time_index,
del_column_name=del_column_name)
normal_dl = tud.DataLoader(dataset=ds, batch_size=batch_size, shuffle=True)
ds.mode = "test"
attack_dl = tud.DataLoader(dataset=ds, batch_size=batch_size, shuffle=False)
return normal_dl, attack_dl
def create_all_dataloader(datasets: [str], input_size: int = 1, output_size: int = 1, step: int = 1,
batch_size: int = 1, time_index: bool = True, del_column_name: bool = True,
preprocess_name: str = "standardization") -> [{}]:
"""
对所有数据集构建dataloader
:param datasets: 数据集列表
:param input_size: 输入数据长度
:param output_size: 输出数据长度
:param step: 截取数据的窗口移动间隔
:param batch_size: batch的大小
:param time_index: True为第一列是时间戳False为不。
:param del_column_name: 文件中第一行为列名时使用True
:param preprocess_name: 预处理方法
:return: 所有数据集的dataloader构建结果
"""
all_dataloader = []
for dataset_name in datasets:
logger.info(f'开始建立 dataloader {dataset_name}')
if "train.csv" in os.listdir(f"./dataset/{dataset_name}"):
normal_dl, attack_dl = create_dataloader(dataset_name=dataset_name, input_size=input_size,
output_size=output_size, step=step, batch_size=batch_size,
time_index=time_index, del_column_name=del_column_name,
preprocess_name=preprocess_name)
all_dataloader.append([{
'dataset_name': dataset_name,
'normal': normal_dl,
'attack': attack_dl
}])
else:
all_sub_dataloader = []
for sub_dataset_dir in os.listdir(f"./dataset/{dataset_name}"):
sub_dataset_name = f"{dataset_name}/{sub_dataset_dir}"
normal_dl, attack_dl = create_dataloader(dataset_name=sub_dataset_name, input_size=input_size,
output_size=output_size, step=step, batch_size=batch_size,
del_time=del_time, del_column_name=del_column_name)
all_sub_dataloader.append({
'dataset_name': sub_dataset_name.replace("/", "-"),
'normal': normal_dl,
'attack': attack_dl
})
all_dataloader.append(all_sub_dataloader)
logger.info(f'完成建立 dataloader {dataset_name}')
return all_dataloader
class EvaluationScore:
def __init__(self, evaluations: [str], attack=1):
"""
用于自动划分阈值并进行批量评估
:param evaluations: 使用的评估方法名称需在evaluation文件夹中进行定义
:param attack: 异常的标签0 or 1
"""
self.time = 0
self.f1 = 0
self.f1_pa = 0
self.f_tad = 0
self.attack = attack
self.normal = 1 - attack
self._total_y_loss = None
self._total_label = None
self._total_pred_label = None
self.true_pred_df = None
self.true_pred_dict = None
self.evaluations = evaluations
self.scores = {}
def add_data(self, y_loss, true_label, pred_label=None):
"""
添加每个batch的数据
:param y_loss: 数据偏差
:param true_label: 真实数据标签
:param pred_label: 预测标签
"""
if pred_label is not None:
if self._total_label is None and self._total_pred_label is None:
self._total_label = true_label
self._total_pred_label = pred_label
else:
self._total_label = torch.cat([self._total_label, true_label], dim=0)
self._total_pred_label = torch.cat([self._total_pred_label, pred_label], dim=0)
return
y_loss = y_loss.view(-1).cpu().detach().numpy()
true_label = true_label.view(-1).cpu().detach().numpy()
if self._total_y_loss is None and self._total_label is None:
self._total_y_loss = y_loss
self._total_label = true_label
return
self._total_y_loss = numpy.concatenate((self._total_y_loss, y_loss), axis=0)
self._total_label = numpy.concatenate((self._total_label, true_label), axis=0)
def best_threshold(self, true_label: list, y_loss: list) -> dict:
ret = {}
for func_name in self.evaluations:
threshold_max = max(y_loss)
threshold_min = 0
best_threshold = 0
for _ in range(5):
threshold_list = [threshold_max - i * (threshold_max - threshold_min) / 10 for i in range(11)]
f1_list = []
for threshold_one in threshold_list:
prediction_loss = numpy.where(numpy.array(y_loss) > threshold_one, self.attack, self.normal)
f1 = eval(f"evaluation.{func_name}.evaluate")(y_true=true_label, y_pred=prediction_loss.tolist())
f1_list.append(f1)
ind = f1_list.index(max(f1_list))
best_threshold = threshold_list[ind]
if ind == 0:
threshold_max = threshold_list[ind]
threshold_min = threshold_list[ind+1]
elif ind == len(threshold_list)-1:
threshold_max = threshold_list[ind-1]
threshold_min = threshold_list[ind]
else:
threshold_max = threshold_list[ind-1]
threshold_min = threshold_list[ind+1]
ret[func_name] = best_threshold
return ret
def auto_threshold(self):
if self._total_pred_label is not None:
return
self._total_y_loss[numpy.isnan(self._total_y_loss)] = 0
self._total_y_loss = self._total_y_loss / max(self._total_y_loss)
thresholds = self.best_threshold(
self._total_label.reshape(-1).data.tolist(), self._total_y_loss.reshape(-1).data.tolist())
self.true_pred_dict = {
'true': self._total_label.squeeze().tolist()
}
for func_name in thresholds:
self.true_pred_dict[func_name] = \
numpy.where(self._total_y_loss > thresholds[func_name], self.attack, self.normal).squeeze().tolist()
# self.true_pred_df = pandas.DataFrame(self.true_pred_dict)
for func_name in self.true_pred_dict:
if func_name == "true":
continue
self.scores[func_name] = self.get_score(func_name)
def get_score(self, func_name):
if self._total_pred_label is not None:
return eval(f"evaluation.{func_name}.evaluate")(self._total_label.reshape(-1).tolist(),
self._total_pred_label.reshape(-1).tolist())
return eval(f"evaluation.{func_name}.evaluate")(self._total_label.reshape(-1).tolist(),
self.true_pred_dict[f"{func_name}"])
def __str__(self):
res = ""
for func_name in self.scores:
res += f"{func_name}={self.scores[func_name]:.3f} "
return res[:-1]
def train_model(epoch: int, optimizer: torch.optim, dataloader: tud.DataLoader, model: nn.Module,
device: str = "cpu") -> (nn.Module, str):
"""
训练模型
:param epoch: 当前训练轮数
:param optimizer: 优化器
:param dataloader: 数据集
:param model: 模型
:param device: 训练设备使用cpu还是gpu
:return: 训练完成的模型;训练完成需要输出的信息
"""
model.train()
avg_loss = []
dataloader.dataset.mode = "train"
start_time = time.time()
with tqdm(total=len(dataloader), ncols=150) as _tqdm:
_tqdm.set_description(f'进度条部分不会写进本地日志epoch:{epoch},训练进度')
for data in dataloader:
x = data[0].to(device)
y_true = data[2].to(device)
optimizer.zero_grad()
loss = model.loss(x=x, y_true=y_true, epoch=epoch, device=device)
avg_loss.append(loss)
optimizer.step()
_tqdm.set_postfix(loss='{:.6f}'.format(sum(avg_loss) / len(avg_loss)))
_tqdm.update(1)
end_time = time.time()
info = f"epoch={epoch}, average loss={'{:.6f}'.format(sum(avg_loss) / len(avg_loss))}, " \
f"train time={'{:.1f}'.format(end_time-start_time)}s"
return model, info
def test_model(dataloader: tud.DataLoader, model: nn.Module, evaluations: [str], device: str = "cpu") -> \
(EvaluationScore, str):
"""
测试模型
:param dataloader: 数据集
:param model: 模型
:param device: 训练设备使用cpu还是gpu
:return: 评估分数;测试完成需要输出的信息
"""
es = EvaluationScore(evaluations)
model.eval()
dataloader.dataset.mode = "test"
start_time = time.time()
with tqdm(total=len(dataloader), ncols=150) as _tqdm:
_tqdm.set_description(f'(进度条部分不会写进本地日志)测试进度')
with torch.no_grad():
for data in dataloader:
x = data[0].to(device)
y_true = data[2].to(device)
label_true = data[1].int().to(device)
y_loss, label_pred = model.detection(x=x, y_true=y_true, device=device)
if label_pred is not None:
es.add_data(y_loss=None, true_label=label_true, pred_label=label_pred)
else:
es.add_data(y_loss=y_loss, true_label=label_true, pred_label=None)
_tqdm.update(1)
end_time = time.time()
es.auto_threshold()
es_score = es.__str__().replace(" ", ", ")
info = f"{es_score}, test time={'{:.1f}'.format(end_time-start_time)}s"
return es, info
def train_and_test_model(start_time: str, epochs: int, normal_dataloader: tud.DataLoader, attack_dataloader: tud.DataLoader,
model: nn.Module, evaluations: [str], device: str = "cpu", lr: float = 1e-4,
model_path: str = None, train: bool = True) -> (dict, dict):
"""
训练与测试
:param start_time: 实验的开始时间。此处用于寻找存放路径。
:param epochs: 总共训练轮数
:param normal_dataloader: 训练数据集
:param attack_dataloader: 测试数据集
:param model: 模型
:param evaluations: 评估方法
:param device: 设备
:param lr: 学习率
:param model_path: 模型参数文件路径
:param train: 是否训练,如果为否,则仅进行测试
:return: 各个评估方法的最佳分数、各个评估方法最佳情况下的检测标签
"""
dataset_name = normal_dataloader.dataset.name
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
if model_path:
try:
checkpoint = torch.load(model_path)
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
logger.info(f"模型参数文件{model_path}加载成功")
except:
logger.warning(f"模型参数文件{model_path}加载失败,将训练新模型")
logger.info(f"模型:{model.name},数据集:{dataset_name},设备:{device},训练开始")
best_score = {}
best_detection = {}
if train:
logger.info(f"模式:训练并测试")
for epoch in range(1, epochs+1):
model, train_info = train_model(epoch=epoch, optimizer=optimizer, dataloader=normal_dataloader, model=model,
device=device)
es, test_info = test_model(dataloader=attack_dataloader, model=model, evaluations=evaluations,
device=device)
logger.info(f"{train_info}, {test_info}")
es_score = es.__str__().replace(" ", "_")
torch.save({
'model_state_dict': model.state_dict(),
'optimizer_state_dict': optimizer.state_dict()
}, f'./records/{start_time}/model/model={model.name}_dataset={dataset_name}_epoch={epoch}_{es_score}.pth')
for func_name in es.scores:
if func_name not in best_score or es.scores[func_name] > best_score[func_name]:
best_score[func_name] = es.scores[func_name]
best_detection[func_name] = es.true_pred_dict[func_name]
best_detection["true"] = es.true_pred_dict["true"]
else:
logger.info(f"模式:仅进行测试")
es, test_info = test_model(dataloader=attack_dataloader, model=model, evaluations=evaluations, device=device)
logger.info(test_info)
for func_name in es.scores:
best_score[func_name] = es.scores[func_name]
best_detection[func_name] = es.true_pred_dict[func_name]
best_detection["true"] = es.true_pred_dict["true"]
return best_score, best_detection