首次提交本地代码

2023-05-25 15:30:02 +08:00
parent 0ead9d742b
commit 2236622f33
27 changed files with 453475 additions and 0 deletions
--- a/config.ini
+++ b/config.ini
@@ -0,0 +1,50 @@
+[Dataset]
+# 填写数据集名称（文件夹名称），同时训练测试多个数据集，请用逗号隔开
+# 请确保数据集已经按照要求格式放置在./dataset路径下
+name=SWAT
+
+[Method]
+# 填写模型名称（文件名称，不用写".py"），同时训练测试多个算法，请用逗号隔开
+# 请确保模型文件已经按照要求格式放置在./model路径下
+# 模型名称请全部由字母组成
+name=AnomalyTransformer
+
+[Preprocess]
+# 填写预处理方法名称（文件名称，不用写".py"）
+# 请确保预处理方法文件已经按照要求格式放置在./preprocess路径下
+name=standardization
+
+[Evaluation]
+# 填写评估方法名称（文件名称，不用写".py"），同时使用多个评估方法，请用逗号隔开
+# 请确保评估方法文件已经按照要求格式放置在./evaluation路径下
+name=f1,f1pa,ftad,affiliation
+
+[ModelPath]
+# 模型加载先前参数，请用"模型名_数据集名"作为变量名
+# 路径名称从TSAD文件夹下开始写相对路径，或者直接写绝对路径
+MtadGatAtt_SWAT=none
+
+[BaseParameters]
+# train表示是否训练模型，请填写true或者false，为false时，仅测试模型
+train=true
+# epoch表示训练轮数，请填写正整数
+epochs=20
+# batch-size表示每个batch的尺寸，请填写正整数
+batch_size=32
+# learning-rate表示学习率，建议不要高于0.001
+learning_rate=1e-4
+# device表示使用的设备，可以选auto、cpu、cuda，其中auto代表自动判断，有gpu使用gpu，没有则使用cpu；选gpu时需要选择gpu序号，例如cuda:0
+device=auto
+
+[CustomParameters]
+# 此处可以填入自定义参数，数量不限，用于填写特殊需求输入，例如模型文件所需的所有外部参数，构建数据集的部分参数
+# 输入数据长度input_size，即截取数据的窗口长度
+input_size=100
+# 输出数据长度output_size
+output_size=1
+# 截取数据的窗口的移动步长
+step=1
+# 数据集文件（csv）中，第一行是否需要忽略（第一行为列名的话需要忽略）
+del_column_name=true
+# 数据集文件（csv）中，第一列是否是时间戳
+time_index=true
--- a/daemon.py
+++ b/daemon.py
@@ -0,0 +1,250 @@
+'''
+***
+Modified generic daemon class
+***
+
+Author:         http://www.jejik.com/articles/2007/02/
+                        a_simple_unix_linux_daemon_in_python/www.boxedice.com
+
+License:        http://creativecommons.org/licenses/by-sa/3.0/
+
+Changes:        23rd Jan 2009 (David Mytton <david@boxedice.com>)
+                - Replaced hard coded '/dev/null in __init__ with os.devnull
+                - Added OS check to conditionally remove code that doesn't
+                  work on OS X
+                - Added output to console on completion
+                - Tidied up formatting
+                11th Mar 2009 (David Mytton <david@boxedice.com>)
+                - Fixed problem with daemon exiting on Python 2.4
+                  (before SystemExit was part of the Exception base)
+                13th Aug 2010 (David Mytton <david@boxedice.com>
+                - Fixed unhandled exception if PID file is empty
+'''
+
+# Core modules
+from __future__ import print_function
+import atexit
+import errno
+import os
+import sys
+import time
+import signal
+
+
+class Daemon(object):
+    """
+    A generic daemon class.
+
+    Usage: subclass the Daemon class and override the run() method
+    """
+    def __init__(self, pidfile, stdin=os.devnull,
+                 stdout=os.devnull, stderr=os.devnull,
+                 home_dir='.', umask=0o22, verbose=1,
+                 use_gevent=False, use_eventlet=False):
+        self.stdin = stdin
+        self.stdout = stdout
+        self.stderr = stderr
+        self.pidfile = pidfile
+        self.home_dir = home_dir
+        self.verbose = verbose
+        self.umask = umask
+        self.daemon_alive = True
+        self.use_gevent = use_gevent
+        self.use_eventlet = use_eventlet
+
+    def log(self, *args):
+        if self.verbose >= 1:
+            print(*args)
+
+    def daemonize(self):
+        """
+        Do the UNIX double-fork magic, see Stevens' "Advanced
+        Programming in the UNIX Environment" for details (ISBN 0201563177)
+        http://www.erlenstar.demon.co.uk/unix/faq_2.html#SEC16
+        """
+        if self.use_eventlet:
+            import eventlet.tpool
+            eventlet.tpool.killall()
+        try:
+            pid = os.fork()
+            if pid > 0:
+                # Exit first parent
+                sys.exit(0)
+        except OSError as e:
+            sys.stderr.write(
+                "fork #1 failed: %d (%s)\n" % (e.errno, e.strerror))
+            sys.exit(1)
+
+        # Decouple from parent environment
+        os.chdir(self.home_dir)
+        os.setsid()
+        os.umask(self.umask)
+
+        # Do second fork
+        try:
+            pid = os.fork()
+            if pid > 0:
+                # Exit from second parent
+                sys.exit(0)
+        except OSError as e:
+            sys.stderr.write(
+                "fork #2 failed: %d (%s)\n" % (e.errno, e.strerror))
+            sys.exit(1)
+
+        if sys.platform != 'darwin':  # This block breaks on OS X
+            # Redirect standard file descriptors
+            sys.stdout.flush()
+            sys.stderr.flush()
+            si = open(self.stdin, 'r')
+            so = open(self.stdout, 'a+')
+            if self.stderr:
+                try:
+                    se = open(self.stderr, 'a+', 0)
+                except ValueError:
+                    # Python 3 can't have unbuffered text I/O
+                    se = open(self.stderr, 'a+', 1)
+            else:
+                se = so
+            os.dup2(si.fileno(), sys.stdin.fileno())
+            os.dup2(so.fileno(), sys.stdout.fileno())
+            os.dup2(se.fileno(), sys.stderr.fileno())
+
+        def sigtermhandler(signum, frame):
+            self.daemon_alive = False
+            sys.exit()
+
+        if self.use_gevent:
+            import gevent
+            gevent.reinit()
+            gevent.signal(signal.SIGTERM, sigtermhandler, signal.SIGTERM, None)
+            gevent.signal(signal.SIGINT, sigtermhandler, signal.SIGINT, None)
+        else:
+            signal.signal(signal.SIGTERM, sigtermhandler)
+            signal.signal(signal.SIGINT, sigtermhandler)
+
+        self.log("Started")
+
+        # Write pidfile
+        atexit.register(
+            self.delpid)  # Make sure pid file is removed if we quit
+        pid = str(os.getpid())
+        open(self.pidfile, 'w+').write("%s\n" % pid)
+
+    def delpid(self):
+        try:
+            # the process may fork itself again
+            pid = int(open(self.pidfile, 'r').read().strip())
+            if pid == os.getpid():
+                os.remove(self.pidfile)
+        except OSError as e:
+            if e.errno == errno.ENOENT:
+                pass
+            else:
+                raise
+
+    def start(self, *args, **kwargs):
+        """
+        Start the daemon
+        """
+
+        self.log("Starting...")
+
+        # Check for a pidfile to see if the daemon already runs
+        try:
+            pf = open(self.pidfile, 'r')
+            pid = int(pf.read().strip())
+            pf.close()
+        except IOError:
+            pid = None
+        except SystemExit:
+            pid = None
+
+        if pid:
+            message = "pidfile %s already exists. Is it already running?\n"
+            sys.stderr.write(message % self.pidfile)
+            sys.exit(1)
+
+        # Start the daemon
+        self.daemonize()
+        self.run(*args, **kwargs)
+
+    def stop(self):
+        """
+        Stop the daemon
+        """
+
+        if self.verbose >= 1:
+            self.log("Stopping...")
+
+        # Get the pid from the pidfile
+        pid = self.get_pid()
+
+        if not pid:
+            message = "pidfile %s does not exist. Not running?\n"
+            sys.stderr.write(message % self.pidfile)
+
+            # Just to be sure. A ValueError might occur if the PID file is
+            # empty but does actually exist
+            if os.path.exists(self.pidfile):
+                os.remove(self.pidfile)
+
+            sys.exit(0)  # Not an error in a restart
+
+        # Try killing the daemon process
+        try:
+            i = 0
+            while 1:
+                os.kill(pid, signal.SIGTERM)
+                time.sleep(0.1)
+                i = i + 1
+                if i % 10 == 0:
+                    os.kill(pid, signal.SIGHUP)
+        except OSError as err:
+            if err.errno == errno.ESRCH:
+                if os.path.exists(self.pidfile):
+                    os.remove(self.pidfile)
+            else:
+                print(str(err))
+                sys.exit(1)
+
+        self.log("Stopped")
+
+
+    def restart(self):
+        """
+        Restart the daemon
+        """
+        self.stop()
+        self.start()
+
+    def get_pid(self):
+        try:
+            pf = open(self.pidfile, 'r')
+            pid = int(pf.read().strip())
+            pf.close()
+        except IOError:
+            pid = None
+        except SystemExit:
+            pid = None
+        return pid
+
+    def is_running(self):
+        pid = self.get_pid()
+
+        if pid is None:
+            self.log('Process is stopped')
+            return False
+        elif os.path.exists('/proc/%d' % pid):
+            self.log('Process (pid %d) is running...' % pid)
+            return True
+        else:
+            self.log('Process (pid %d) is killed' % pid)
+            return False
+
+    def run(self):
+        """
+        You should override this method when you subclass Daemon.
+        It will be called after the process has been
+        daemonized by start() or restart().
+        """
+        raise NotImplementedError
--- a/evaluation/init.py
+++ b/evaluation/init.py
@@ -0,0 +1,4 @@
+from .ftad import evaluate
+from .f1pa import evaluate
+from .affiliation import evaluate
+from .f1 import evaluate
--- a/evaluation/affiliation.py
+++ b/evaluation/affiliation.py
@@ -0,0 +1,23 @@
+from .affiliation_bin.generics import convert_vector_to_events
+from .affiliation_bin.metrics import pr_from_events
+
+
+def evaluate(y_true: list, y_pred: list) -> float:
+    """
+    F1PA评估方法，经过point adjust调整标签后再用F1评分
+    :param y_true: 真实标签
+    :param y_pred: 检测标签
+    :return: affiliation的三个score
+    """
+    true, pred = y_true.copy(), y_pred.copy()
+    events_pred = convert_vector_to_events(pred)
+    events_gt = convert_vector_to_events(true)
+    Trange = (0, len(pred))
+
+    res = pr_from_events(events_pred, events_gt, Trange)
+    aff_precision = res["precision"]
+    aff_recall = res["recall"]
+    if aff_recall == 0 or aff_precision == 0:
+        return 0
+    aff_f1 = 2 * aff_precision * aff_recall / (aff_precision + aff_recall)
+    return aff_f1
--- a/evaluation/affiliation_bin/init.py
+++ b/evaluation/affiliation_bin/init.py
@@ -0,0 +1 @@
+from .metrics import pr_from_events
--- a/evaluation/affiliation_bin/_affiliation_zone.py
+++ b/evaluation/affiliation_bin/_affiliation_zone.py
@@ -0,0 +1,91 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+from ._integral_interval import interval_intersection
+
+
+def t_start(j, Js=[(1, 2), (3, 4), (5, 6)], Trange=(1, 10)):
+    """
+    Helper for `E_gt_func`
+
+    :param j: index from 0 to len(Js) (included) on which to get the start
+    :param Js: ground truth events, as a list of couples
+    :param Trange: range of the series where Js is included
+    :return: generalized start such that the middle of t_start and t_stop
+    always gives the affiliation_bin zone
+    """
+    b = max(Trange)
+    n = len(Js)
+    if j == n:
+        return (2 * b - t_stop(n - 1, Js, Trange))
+    else:
+        return (Js[j][0])
+
+
+def t_stop(j, Js=[(1, 2), (3, 4), (5, 6)], Trange=(1, 10)):
+    """
+    Helper for `E_gt_func`
+
+    :param j: index from 0 to len(Js) (included) on which to get the stop
+    :param Js: ground truth events, as a list of couples
+    :param Trange: range of the series where Js is included
+    :return: generalized stop such that the middle of t_start and t_stop
+    always gives the affiliation_bin zone
+    """
+    if j == -1:
+        a = min(Trange)
+        return (2 * a - t_start(0, Js, Trange))
+    else:
+        return (Js[j][1])
+
+
+def E_gt_func(j, Js, Trange):
+    """
+    Get the affiliation_bin zone of element j of the ground truth
+
+    :param j: index from 0 to len(Js) (excluded) on which to get the zone
+    :param Js: ground truth events, as a list of couples
+    :param Trange: range of the series where Js is included, can
+    be (-math.inf, math.inf) for distance measures
+    :return: affiliation_bin zone of element j of the ground truth represented
+    as a couple
+    """
+    range_left = (t_stop(j - 1, Js, Trange) + t_start(j, Js, Trange)) / 2
+    range_right = (t_stop(j, Js, Trange) + t_start(j + 1, Js, Trange)) / 2
+    return ((range_left, range_right))
+
+
+def get_all_E_gt_func(Js, Trange):
+    """
+    Get the affiliation_bin partition from the ground truth point of view
+
+    :param Js: ground truth events, as a list of couples
+    :param Trange: range of the series where Js is included, can
+    be (-math.inf, math.inf) for distance measures
+    :return: affiliation_bin partition of the events
+    """
+    # E_gt is the limit of affiliation_bin/attraction for each ground truth event
+    E_gt = [E_gt_func(j, Js, Trange) for j in range(len(Js))]
+    return (E_gt)
+
+
+def affiliation_partition(Is=[(1, 1.5), (2, 5), (5, 6), (8, 9)], E_gt=[(1, 2.5), (2.5, 4.5), (4.5, 10)]):
+    """
+    Cut the events into the affiliation_bin zones
+    The presentation given here is from the ground truth point of view,
+    but it is also used in the reversed direction in the main function.
+
+    :param Is: events as a list of couples
+    :param E_gt: range of the affiliation_bin zones
+    :return: a list of list of intervals (each interval represented by either
+    a couple or None for empty interval). The outer list is indexed by each
+    affiliation_bin zone of `E_gt`. The inner list is indexed by the events of `Is`.
+    """
+    out = [None] * len(E_gt)
+    for j in range(len(E_gt)):
+        E_gt_j = E_gt[j]
+        discarded_idx_before = [I[1] < E_gt_j[0] for I in Is]  # end point of predicted I is before the begin of E
+        discarded_idx_after = [I[0] > E_gt_j[1] for I in Is]  # start of predicted I is after the end of E
+        kept_index = [not (a or b) for a, b in zip(discarded_idx_before, discarded_idx_after)]
+        Is_j = [x for x, y in zip(Is, kept_index)]
+        out[j] = [interval_intersection(I, E_gt[j]) for I in Is_j]
+    return (out)
--- a/evaluation/affiliation_bin/_integral_interval.py
+++ b/evaluation/affiliation_bin/_integral_interval.py
@@ -0,0 +1,493 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+import math
+from .generics import _sum_wo_nan
+
+"""
+In order to shorten the length of the variables,
+the general convention in this file is to let:
+    - I for a predicted event (start, stop),
+    - Is for a list of predicted events,
+    - J for a ground truth event,
+    - Js for a list of ground truth events.
+"""
+
+
+def interval_length(J=(1, 2)):
+    """
+    Length of an interval
+
+    :param J: couple representating the start and stop of an interval, or None
+    :return: length of the interval, and 0 for a None interval
+    """
+    if J is None:
+        return (0)
+    return (J[1] - J[0])
+
+
+def sum_interval_lengths(Is=[(1, 2), (3, 4), (5, 6)]):
+    """
+    Sum of length of the intervals
+
+    :param Is: list of intervals represented by starts and stops
+    :return: sum of the interval length
+    """
+    return (sum([interval_length(I) for I in Is]))
+
+
+def interval_intersection(I=(1, 3), J=(2, 4)):
+    """
+    Intersection between two intervals I and J
+    I and J should be either empty or represent a positive interval (no point)
+
+    :param I: an interval represented by start and stop
+    :param J: a second interval of the same form
+    :return: an interval representing the start and stop of the intersection (or None if empty)
+    """
+    if I is None:
+        return (None)
+    if J is None:
+        return (None)
+
+    I_inter_J = (max(I[0], J[0]), min(I[1], J[1]))
+    if I_inter_J[0] >= I_inter_J[1]:
+        return (None)
+    else:
+        return (I_inter_J)
+
+
+def interval_subset(I=(1, 3), J=(0, 6)):
+    """
+    Checks whether I is a subset of J
+
+    :param I: an non empty interval represented by start and stop
+    :param J: a second non empty interval of the same form
+    :return: True if I is a subset of J
+    """
+    if (I[0] >= J[0]) and (I[1] <= J[1]):
+        return True
+    else:
+        return False
+
+
+def cut_into_three_func(I, J):
+    """
+    Cut an interval I into a partition of 3 subsets:
+        the elements before J,
+        the elements belonging to J,
+        and the elements after J
+
+    :param I: an interval represented by start and stop, or None for an empty one
+    :param J: a non empty interval
+    :return: a triplet of three intervals, each represented by either (start, stop) or None
+    """
+    if I is None:
+        return ((None, None, None))
+
+    I_inter_J = interval_intersection(I, J)
+    if I == I_inter_J:
+        I_before = None
+        I_after = None
+    elif I[1] <= J[0]:
+        I_before = I
+        I_after = None
+    elif I[0] >= J[1]:
+        I_before = None
+        I_after = I
+    elif (I[0] <= J[0]) and (I[1] >= J[1]):
+        I_before = (I[0], I_inter_J[0])
+        I_after = (I_inter_J[1], I[1])
+    elif I[0] <= J[0]:
+        I_before = (I[0], I_inter_J[0])
+        I_after = None
+    elif I[1] >= J[1]:
+        I_before = None
+        I_after = (I_inter_J[1], I[1])
+    else:
+        raise ValueError('unexpected unconsidered case')
+    return (I_before, I_inter_J, I_after)
+
+
+def get_pivot_j(I, J):
+    """
+    Get the single point of J that is the closest to I, called 'pivot' here,
+    with the requirement that I should be outside J
+
+    :param I: a non empty interval (start, stop)
+    :param J: another non empty interval, with empty intersection with I
+    :return: the element j of J that is the closest to I
+    """
+    if interval_intersection(I, J) is not None:
+        raise ValueError('I and J should have a void intersection')
+
+    j_pivot = None  # j_pivot is a border of J
+    if max(I) <= min(J):
+        j_pivot = min(J)
+    elif min(I) >= max(J):
+        j_pivot = max(J)
+    else:
+        raise ValueError('I should be outside J')
+    return (j_pivot)
+
+
+def integral_mini_interval(I, J):
+    """
+    In the specific case where interval I is located outside J,
+    integral of distance from x to J over the interval x \in I.
+    This is the *integral* i.e. the sum.
+    It's not the mean (not divided by the length of I yet)
+
+    :param I: a interval (start, stop), or None
+    :param J: a non empty interval, with empty intersection with I
+    :return: the integral of distances d(x, J) over x \in I
+    """
+    if I is None:
+        return (0)
+
+    j_pivot = get_pivot_j(I, J)
+    a = min(I)
+    b = max(I)
+    return ((b - a) * abs((j_pivot - (a + b) / 2)))
+
+
+def integral_interval_distance(I, J):
+    """
+    For any non empty intervals I, J, compute the
+    integral of distance from x to J over the interval x \in I.
+    This is the *integral* i.e. the sum.
+    It's not the mean (not divided by the length of I yet)
+    The interval I can intersect J or not
+
+    :param I: a interval (start, stop), or None
+    :param J: a non empty interval
+    :return: the integral of distances d(x, J) over x \in I
+    """
+
+    # I and J are single intervals (not generic sets)
+    # I is a predicted interval in the range of affiliation_bin of J
+
+    def f(I_cut):
+        return (integral_mini_interval(I_cut, J))
+
+    # If I_middle is fully included into J, it is
+    # the distance to J is always 0
+    def f0(I_middle):
+        return (0)
+
+    cut_into_three = cut_into_three_func(I, J)
+    # Distance for now, not the mean:
+    # Distance left: Between cut_into_three[0] and the point min(J)
+    d_left = f(cut_into_three[0])
+    # Distance middle: Between cut_into_three[1] = I inter J, and J
+    d_middle = f0(cut_into_three[1])
+    # Distance right: Between cut_into_three[2] and the point max(J)
+    d_right = f(cut_into_three[2])
+    # It's an integral so summable
+    return (d_left + d_middle + d_right)
+
+
+def integral_mini_interval_P_CDFmethod__min_piece(I, J, E):
+    """
+    Helper of `integral_mini_interval_Pprecision_CDFmethod`
+    In the specific case where interval I is located outside J,
+    compute the integral $\int_{d_min}^{d_max} \min(m, x) dx$, with:
+    - m the smallest distance from J to E,
+    - d_min the smallest distance d(x, J) from x \in I to J
+    - d_max the largest distance d(x, J) from x \in I to J
+
+    :param I: a single predicted interval, a non empty interval (start, stop)
+    :param J: ground truth interval, a non empty interval, with empty intersection with I
+    :param E: the affiliation_bin/influence zone for J, represented as a couple (start, stop)
+    :return: the integral $\int_{d_min}^{d_max} \min(m, x) dx$
+    """
+    if interval_intersection(I, J) is not None:
+        raise ValueError('I and J should have a void intersection')
+    if not interval_subset(J, E):
+        raise ValueError('J should be included in E')
+    if not interval_subset(I, E):
+        raise ValueError('I should be included in E')
+
+    e_min = min(E)
+    j_min = min(J)
+    j_max = max(J)
+    e_max = max(E)
+    i_min = min(I)
+    i_max = max(I)
+
+    d_min = max(i_min - j_max, j_min - i_max)
+    d_max = max(i_max - j_max, j_min - i_min)
+    m = min(j_min - e_min, e_max - j_max)
+    A = min(d_max, m) ** 2 - min(d_min, m) ** 2
+    B = max(d_max, m) - max(d_min, m)
+    C = (1 / 2) * A + m * B
+    return (C)
+
+
+def integral_mini_interval_Pprecision_CDFmethod(I, J, E):
+    """
+    Integral of the probability of distances over the interval I.
+    In the specific case where interval I is located outside J,
+    compute the integral $\int_{x \in I} Fbar(dist(x,J)) dx$.
+    This is the *integral* i.e. the sum (not the mean)
+
+    :param I: a single predicted interval, a non empty interval (start, stop)
+    :param J: ground truth interval, a non empty interval, with empty intersection with I
+    :param E: the affiliation_bin/influence zone for J, represented as a couple (start, stop)
+    :return: the integral $\int_{x \in I} Fbar(dist(x,J)) dx$
+    """
+    integral_min_piece = integral_mini_interval_P_CDFmethod__min_piece(I, J, E)
+
+    e_min = min(E)
+    j_min = min(J)
+    j_max = max(J)
+    e_max = max(E)
+    i_min = min(I)
+    i_max = max(I)
+    d_min = max(i_min - j_max, j_min - i_max)
+    d_max = max(i_max - j_max, j_min - i_min)
+    integral_linear_piece = (1 / 2) * (d_max ** 2 - d_min ** 2)
+    integral_remaining_piece = (j_max - j_min) * (i_max - i_min)
+
+    DeltaI = i_max - i_min
+    DeltaE = e_max - e_min
+
+    output = DeltaI - (1 / DeltaE) * (integral_min_piece + integral_linear_piece + integral_remaining_piece)
+    return (output)
+
+
+def integral_interval_probaCDF_precision(I, J, E):
+    """
+    Integral of the probability of distances over the interval I.
+    Compute the integral $\int_{x \in I} Fbar(dist(x,J)) dx$.
+    This is the *integral* i.e. the sum (not the mean)
+
+    :param I: a single (non empty) predicted interval in the zone of affiliation_bin of J
+    :param J: ground truth interval
+    :param E: affiliation_bin/influence zone for J
+    :return: the integral $\int_{x \in I} Fbar(dist(x,J)) dx$
+    """
+
+    # I and J are single intervals (not generic sets)
+    def f(I_cut):
+        if I_cut is None:
+            return (0)
+        else:
+            return (integral_mini_interval_Pprecision_CDFmethod(I_cut, J, E))
+
+    # If I_middle is fully included into J, it is
+    # integral of 1 on the interval I_middle, so it's |I_middle|
+    def f0(I_middle):
+        if I_middle is None:
+            return (0)
+        else:
+            return (max(I_middle) - min(I_middle))
+
+    cut_into_three = cut_into_three_func(I, J)
+    # Distance for now, not the mean:
+    # Distance left: Between cut_into_three[0] and the point min(J)
+    d_left = f(cut_into_three[0])
+    # Distance middle: Between cut_into_three[1] = I inter J, and J
+    d_middle = f0(cut_into_three[1])
+    # Distance right: Between cut_into_three[2] and the point max(J)
+    d_right = f(cut_into_three[2])
+    # It's an integral so summable
+    return (d_left + d_middle + d_right)
+
+
+def cut_J_based_on_mean_func(J, e_mean):
+    """
+    Helper function for the recall.
+    Partition J into two intervals: before and after e_mean
+    (e_mean represents the center element of E the zone of affiliation_bin)
+
+    :param J: ground truth interval
+    :param e_mean: a float number (center value of E)
+    :return: a couple partitionning J into (J_before, J_after)
+    """
+    if J is None:
+        J_before = None
+        J_after = None
+    elif e_mean >= max(J):
+        J_before = J
+        J_after = None
+    elif e_mean <= min(J):
+        J_before = None
+        J_after = J
+    else:  # e_mean is across J
+        J_before = (min(J), e_mean)
+        J_after = (e_mean, max(J))
+
+    return ((J_before, J_after))
+
+
+def integral_mini_interval_Precall_CDFmethod(I, J, E):
+    """
+    Integral of the probability of distances over the interval J.
+    In the specific case where interval J is located outside I,
+    compute the integral $\int_{y \in J} Fbar_y(dist(y,I)) dy$.
+    This is the *integral* i.e. the sum (not the mean)
+
+    :param I: a single (non empty) predicted interval
+    :param J: ground truth (non empty) interval, with empty intersection with I
+    :param E: the affiliation_bin/influence zone for J, represented as a couple (start, stop)
+    :return: the integral $\int_{y \in J} Fbar_y(dist(y,I)) dy$
+    """
+    # The interval J should be located outside I
+    # (so it's either the left piece or the right piece w.r.t I)
+    i_pivot = get_pivot_j(J, I)
+    e_min = min(E)
+    e_max = max(E)
+    e_mean = (e_min + e_max) / 2
+
+    # If i_pivot is outside E (it's possible), then
+    # the distance is worst that any random element within E,
+    # so we set the recall to 0
+    if i_pivot <= min(E):
+        return (0)
+    elif i_pivot >= max(E):
+        return (0)
+    # Otherwise, we have at least i_pivot in E and so d < M so min(d,M)=d
+
+    cut_J_based_on_e_mean = cut_J_based_on_mean_func(J, e_mean)
+    J_before = cut_J_based_on_e_mean[0]
+    J_after = cut_J_based_on_e_mean[1]
+
+    iemin_mean = (e_min + i_pivot) / 2
+    cut_Jbefore_based_on_iemin_mean = cut_J_based_on_mean_func(J_before, iemin_mean)
+    J_before_closeE = cut_Jbefore_based_on_iemin_mean[
+        0]  # before e_mean and closer to e_min than i_pivot ~ J_before_before
+    J_before_closeI = cut_Jbefore_based_on_iemin_mean[
+        1]  # before e_mean and closer to i_pivot than e_min ~ J_before_after
+
+    iemax_mean = (e_max + i_pivot) / 2
+    cut_Jafter_based_on_iemax_mean = cut_J_based_on_mean_func(J_after, iemax_mean)
+    J_after_closeI = cut_Jafter_based_on_iemax_mean[0]  # after e_mean and closer to i_pivot than e_max ~ J_after_before
+    J_after_closeE = cut_Jafter_based_on_iemax_mean[1]  # after e_mean and closer to e_max than i_pivot ~ J_after_after
+
+    if J_before_closeE is not None:
+        j_before_before_min = min(J_before_closeE)  # == min(J)
+        j_before_before_max = max(J_before_closeE)
+    else:
+        j_before_before_min = math.nan
+        j_before_before_max = math.nan
+
+    if J_before_closeI is not None:
+        j_before_after_min = min(J_before_closeI)  # == j_before_before_max if existing
+        j_before_after_max = max(J_before_closeI)  # == max(J_before)
+    else:
+        j_before_after_min = math.nan
+        j_before_after_max = math.nan
+
+    if J_after_closeI is not None:
+        j_after_before_min = min(J_after_closeI)  # == min(J_after)
+        j_after_before_max = max(J_after_closeI)
+    else:
+        j_after_before_min = math.nan
+        j_after_before_max = math.nan
+
+    if J_after_closeE is not None:
+        j_after_after_min = min(J_after_closeE)  # == j_after_before_max if existing
+        j_after_after_max = max(J_after_closeE)  # == max(J)
+    else:
+        j_after_after_min = math.nan
+        j_after_after_max = math.nan
+
+    # <-- J_before_closeE --> <-- J_before_closeI --> <-- J_after_closeI --> <-- J_after_closeE -->
+    # j_bb_min       j_bb_max j_ba_min       j_ba_max j_ab_min      j_ab_max j_aa_min      j_aa_max
+    # (with `b` for before and `a` for after in the previous variable names)
+
+    #                                          vs e_mean  m = min(t-e_min, e_max-t)  d=|i_pivot-t|   min(d,m)                            \int min(d,m)dt   \int d dt        \int_(min(d,m)+d)dt                                    \int_{t \in J}(min(d,m)+d)dt
+    # Case J_before_closeE & i_pivot after J   before     t-e_min                    i_pivot-t       min(i_pivot-t,t-e_min) = t-e_min    t^2/2-e_min*t     i_pivot*t-t^2/2  t^2/2-e_min*t+i_pivot*t-t^2/2 = (i_pivot-e_min)*t      (i_pivot-e_min)*tB - (i_pivot-e_min)*tA = (i_pivot-e_min)*(tB-tA)
+    # Case J_before_closeI & i_pivot after J   before     t-e_min                    i_pivot-t       min(i_pivot-t,t-e_min) = i_pivot-t  i_pivot*t-t^2/2   i_pivot*t-t^2/2  i_pivot*t-t^2/2+i_pivot*t-t^2/2 = 2*i_pivot*t-t^2      2*i_pivot*tB-tB^2 - 2*i_pivot*tA + tA^2 = 2*i_pivot*(tB-tA) - (tB^2 - tA^2)
+    # Case J_after_closeI & i_pivot after J    after      e_max-t                    i_pivot-t       min(i_pivot-t,e_max-t) = i_pivot-t  i_pivot*t-t^2/2   i_pivot*t-t^2/2  i_pivot*t-t^2/2+i_pivot*t-t^2/2 = 2*i_pivot*t-t^2      2*i_pivot*tB-tB^2 - 2*i_pivot*tA + tA^2 = 2*i_pivot*(tB-tA) - (tB^2 - tA^2)
+    # Case J_after_closeE & i_pivot after J    after      e_max-t                    i_pivot-t       min(i_pivot-t,e_max-t) = e_max-t    e_max*t-t^2/2     i_pivot*t-t^2/2  e_max*t-t^2/2+i_pivot*t-t^2/2 = (e_max+i_pivot)*t-t^2  (e_max+i_pivot)*tB-tB^2 - (e_max+i_pivot)*tA + tA^2 = (e_max+i_pivot)*(tB-tA) - (tB^2 - tA^2)
+    #
+    # Case J_before_closeE & i_pivot before J  before     t-e_min                    t-i_pivot       min(t-i_pivot,t-e_min) = t-e_min    t^2/2-e_min*t     t^2/2-i_pivot*t  t^2/2-e_min*t+t^2/2-i_pivot*t = t^2-(e_min+i_pivot)*t  tB^2-(e_min+i_pivot)*tB - tA^2 + (e_min+i_pivot)*tA = (tB^2 - tA^2) - (e_min+i_pivot)*(tB-tA)
+    # Case J_before_closeI & i_pivot before J  before     t-e_min                    t-i_pivot       min(t-i_pivot,t-e_min) = t-i_pivot  t^2/2-i_pivot*t   t^2/2-i_pivot*t  t^2/2-i_pivot*t+t^2/2-i_pivot*t = t^2-2*i_pivot*t      tB^2-2*i_pivot*tB - tA^2 + 2*i_pivot*tA = (tB^2 - tA^2) - 2*i_pivot*(tB-tA)
+    # Case J_after_closeI & i_pivot before J   after      e_max-t                    t-i_pivot       min(t-i_pivot,e_max-t) = t-i_pivot  t^2/2-i_pivot*t   t^2/2-i_pivot*t  t^2/2-i_pivot*t+t^2/2-i_pivot*t = t^2-2*i_pivot*t      tB^2-2*i_pivot*tB - tA^2 + 2*i_pivot*tA = (tB^2 - tA^2) - 2*i_pivot*(tB-tA)
+    # Case J_after_closeE & i_pivot before J   after      e_max-t                    t-i_pivot       min(t-i_pivot,e_max-t) = e_max-t    e_max*t-t^2/2     t^2/2-i_pivot*t  e_max*t-t^2/2+t^2/2-i_pivot*t = (e_max-i_pivot)*t      (e_max-i_pivot)*tB - (e_max-i_pivot)*tA = (e_max-i_pivot)*(tB-tA)
+
+    if i_pivot >= max(J):
+        part1_before_closeE = (i_pivot - e_min) * (
+                    j_before_before_max - j_before_before_min)  # (i_pivot-e_min)*(tB-tA) # j_before_before_max - j_before_before_min
+        part2_before_closeI = 2 * i_pivot * (j_before_after_max - j_before_after_min) - (
+                    j_before_after_max ** 2 - j_before_after_min ** 2)  # 2*i_pivot*(tB-tA) - (tB^2 - tA^2) # j_before_after_max - j_before_after_min
+        part3_after_closeI = 2 * i_pivot * (j_after_before_max - j_after_before_min) - (
+                    j_after_before_max ** 2 - j_after_before_min ** 2)  # 2*i_pivot*(tB-tA) - (tB^2 - tA^2) # j_after_before_max - j_after_before_min
+        part4_after_closeE = (e_max + i_pivot) * (j_after_after_max - j_after_after_min) - (
+                    j_after_after_max ** 2 - j_after_after_min ** 2)  # (e_max+i_pivot)*(tB-tA) - (tB^2 - tA^2) # j_after_after_max - j_after_after_min
+        out_parts = [part1_before_closeE, part2_before_closeI, part3_after_closeI, part4_after_closeE]
+    elif i_pivot <= min(J):
+        part1_before_closeE = (j_before_before_max ** 2 - j_before_before_min ** 2) - (e_min + i_pivot) * (
+                    j_before_before_max - j_before_before_min)  # (tB^2 - tA^2) - (e_min+i_pivot)*(tB-tA) # j_before_before_max - j_before_before_min
+        part2_before_closeI = (j_before_after_max ** 2 - j_before_after_min ** 2) - 2 * i_pivot * (
+                    j_before_after_max - j_before_after_min)  # (tB^2 - tA^2) - 2*i_pivot*(tB-tA) # j_before_after_max - j_before_after_min
+        part3_after_closeI = (j_after_before_max ** 2 - j_after_before_min ** 2) - 2 * i_pivot * (
+                    j_after_before_max - j_after_before_min)  # (tB^2 - tA^2) - 2*i_pivot*(tB-tA) # j_after_before_max - j_after_before_min
+        part4_after_closeE = (e_max - i_pivot) * (
+                    j_after_after_max - j_after_after_min)  # (e_max-i_pivot)*(tB-tA) # j_after_after_max - j_after_after_min
+        out_parts = [part1_before_closeE, part2_before_closeI, part3_after_closeI, part4_after_closeE]
+    else:
+        raise ValueError('The i_pivot should be outside J')
+
+    out_integral_min_dm_plus_d = _sum_wo_nan(out_parts)  # integral on all J, i.e. sum of the disjoint parts
+
+    # We have for each point t of J:
+    # \bar{F}_{t, recall}(d) = 1 - (1/|E|) * (min(d,m) + d)
+    # Since t is a single-point here, and we are in the case where i_pivot is inside E.
+    # The integral is then given by:
+    # C = \int_{t \in J} \bar{F}_{t, recall}(D(t)) dt
+    #   = \int_{t \in J} 1 - (1/|E|) * (min(d,m) + d) dt
+    #   = |J| - (1/|E|) * [\int_{t \in J} (min(d,m) + d) dt]
+    #   = |J| - (1/|E|) * out_integral_min_dm_plus_d
+    DeltaJ = max(J) - min(J)
+    DeltaE = max(E) - min(E)
+    C = DeltaJ - (1 / DeltaE) * out_integral_min_dm_plus_d
+
+    return (C)
+
+
+def integral_interval_probaCDF_recall(I, J, E):
+    """
+    Integral of the probability of distances over the interval J.
+    Compute the integral $\int_{y \in J} Fbar_y(dist(y,I)) dy$.
+    This is the *integral* i.e. the sum (not the mean)
+
+    :param I: a single (non empty) predicted interval
+    :param J: ground truth (non empty) interval
+    :param E: the affiliation_bin/influence zone for J
+    :return: the integral $\int_{y \in J} Fbar_y(dist(y,I)) dy$
+    """
+
+    # I and J are single intervals (not generic sets)
+    # E is the outside affiliation_bin interval of J (even for recall!)
+    # (in particular J \subset E)
+    #
+    # J is the portion of the ground truth affiliated to I
+    # I is a predicted interval (can be outside E possibly since it's recall)
+    def f(J_cut):
+        if J_cut is None:
+            return (0)
+        else:
+            return integral_mini_interval_Precall_CDFmethod(I, J_cut, E)
+
+    # If J_middle is fully included into I, it is
+    # integral of 1 on the interval J_middle, so it's |J_middle|
+    def f0(J_middle):
+        if J_middle is None:
+            return (0)
+        else:
+            return (max(J_middle) - min(J_middle))
+
+    cut_into_three = cut_into_three_func(J, I)  # it's J that we cut into 3, depending on the position w.r.t I
+    # since we integrate over J this time.
+    #
+    # Distance for now, not the mean:
+    # Distance left: Between cut_into_three[0] and the point min(I)
+    d_left = f(cut_into_three[0])
+    # Distance middle: Between cut_into_three[1] = J inter I, and I
+    d_middle = f0(cut_into_three[1])
+    # Distance right: Between cut_into_three[2] and the point max(I)
+    d_right = f(cut_into_three[2])
+    # It's an integral so summable
+    return (d_left + d_middle + d_right)
--- a/evaluation/affiliation_bin/_single_ground_truth_event.py
+++ b/evaluation/affiliation_bin/_single_ground_truth_event.py
@@ -0,0 +1,72 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+import math
+from ._affiliation_zone import (
+    get_all_E_gt_func,
+    affiliation_partition)
+from ._integral_interval import (
+    integral_interval_distance,
+    integral_interval_probaCDF_precision,
+    integral_interval_probaCDF_recall,
+    interval_length,
+    sum_interval_lengths)
+
+
+def affiliation_precision_distance(Is=[(1, 2), (3, 4), (5, 6)], J=(2, 5.5)):
+    """
+    Compute the individual average distance from Is to a single ground truth J
+
+    :param Is: list of predicted events within the affiliation_bin zone of J
+    :param J: couple representating the start and stop of a ground truth interval
+    :return: individual average precision directed distance number
+    """
+    if all([I is None for I in Is]):  # no prediction in the current area
+        return (math.nan)  # undefined
+    return (sum([integral_interval_distance(I, J) for I in Is]) / sum_interval_lengths(Is))
+
+
+def affiliation_precision_proba(Is=[(1, 2), (3, 4), (5, 6)], J=(2, 5.5), E=(0, 8)):
+    """
+    Compute the individual precision probability from Is to a single ground truth J
+
+    :param Is: list of predicted events within the affiliation_bin zone of J
+    :param J: couple representating the start and stop of a ground truth interval
+    :param E: couple representing the start and stop of the zone of affiliation_bin of J
+    :return: individual precision probability in [0, 1], or math.nan if undefined
+    """
+    if all([I is None for I in Is]):  # no prediction in the current area
+        return (math.nan)  # undefined
+    return (sum([integral_interval_probaCDF_precision(I, J, E) for I in Is]) / sum_interval_lengths(Is))
+
+
+def affiliation_recall_distance(Is=[(1, 2), (3, 4), (5, 6)], J=(2, 5.5)):
+    """
+    Compute the individual average distance from a single J to the predictions Is
+
+    :param Is: list of predicted events within the affiliation_bin zone of J
+    :param J: couple representating the start and stop of a ground truth interval
+    :return: individual average recall directed distance number
+    """
+    Is = [I for I in Is if I is not None]  # filter possible None in Is
+    if len(Is) == 0:  # there is no prediction in the current area
+        return (math.inf)
+    E_gt_recall = get_all_E_gt_func(Is, (-math.inf, math.inf))  # here from the point of view of the predictions
+    Js = affiliation_partition([J], E_gt_recall)  # partition of J depending of proximity with Is
+    return (sum([integral_interval_distance(J[0], I) for I, J in zip(Is, Js)]) / interval_length(J))
+
+
+def affiliation_recall_proba(Is=[(1, 2), (3, 4), (5, 6)], J=(2, 5.5), E=(0, 8)):
+    """
+    Compute the individual recall probability from a single ground truth J to Is
+
+    :param Is: list of predicted events within the affiliation_bin zone of J
+    :param J: couple representating the start and stop of a ground truth interval
+    :param E: couple representing the start and stop of the zone of affiliation_bin of J
+    :return: individual recall probability in [0, 1]
+    """
+    Is = [I for I in Is if I is not None]  # filter possible None in Is
+    if len(Is) == 0:  # there is no prediction in the current area
+        return (0)
+    E_gt_recall = get_all_E_gt_func(Is, E)  # here from the point of view of the predictions
+    Js = affiliation_partition([J], E_gt_recall)  # partition of J depending of proximity with Is
+    return (sum([integral_interval_probaCDF_recall(I, J[0], E) for I, J in zip(Is, Js)]) / interval_length(J))
--- a/evaluation/affiliation_bin/generics.py
+++ b/evaluation/affiliation_bin/generics.py
@@ -0,0 +1,143 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+from itertools import groupby
+from operator import itemgetter
+import math
+import gzip
+import glob
+import os
+
+
+def convert_vector_to_events(vector=[0, 1, 1, 0, 0, 1, 0]):
+    """
+    Convert a binary vector (indicating 1 for the anomalous instances)
+    to a list of events. The events are considered as durations,
+    i.e. setting 1 at index i corresponds to an anomalous interval [i, i+1).
+
+    :param vector: a list of elements belonging to {0, 1}
+    :return: a list of couples, each couple representing the start and stop of
+    each event
+    """
+    positive_indexes = [idx for idx, val in enumerate(vector) if val > 0]
+    events = []
+    for k, g in groupby(enumerate(positive_indexes), lambda ix: ix[0] - ix[1]):
+        cur_cut = list(map(itemgetter(1), g))
+        events.append((cur_cut[0], cur_cut[-1]))
+
+    # Consistent conversion in case of range anomalies (for indexes):
+    # A positive index i is considered as the interval [i, i+1),
+    # so the last index should be moved by 1
+    events = [(x, y + 1) for (x, y) in events]
+
+    return (events)
+
+
+def infer_Trange(events_pred, events_gt):
+    """
+    Given the list of events events_pred and events_gt, get the
+    smallest possible Trange corresponding to the start and stop indexes
+    of the whole series.
+    Trange will not influence the measure of distances, but will impact the
+    measures of probabilities.
+
+    :param events_pred: a list of couples corresponding to predicted events
+    :param events_gt: a list of couples corresponding to ground truth events
+    :return: a couple corresponding to the smallest range containing the events
+    """
+    if len(events_gt) == 0:
+        raise ValueError('The gt events should contain at least one event')
+    if len(events_pred) == 0:
+        # empty prediction, base Trange only on events_gt (which is non empty)
+        return (infer_Trange(events_gt, events_gt))
+
+    min_pred = min([x[0] for x in events_pred])
+    min_gt = min([x[0] for x in events_gt])
+    max_pred = max([x[1] for x in events_pred])
+    max_gt = max([x[1] for x in events_gt])
+    Trange = (min(min_pred, min_gt), max(max_pred, max_gt))
+    return (Trange)
+
+
+def has_point_anomalies(events):
+    """
+    Checking whether events contain point anomalies, i.e.
+    events starting and stopping at the same time.
+
+    :param events: a list of couples corresponding to predicted events
+    :return: True is the events have any point anomalies, False otherwise
+    """
+    if len(events) == 0:
+        return (False)
+    return (min([x[1] - x[0] for x in events]) == 0)
+
+
+def _sum_wo_nan(vec):
+    """
+    Sum of elements, ignoring math.isnan ones
+
+    :param vec: vector of floating numbers
+    :return: sum of the elements, ignoring math.isnan ones
+    """
+    vec_wo_nan = [e for e in vec if not math.isnan(e)]
+    return (sum(vec_wo_nan))
+
+
+def _len_wo_nan(vec):
+    """
+    Count of elements, ignoring math.isnan ones
+
+    :param vec: vector of floating numbers
+    :return: count of the elements, ignoring math.isnan ones
+    """
+    vec_wo_nan = [e for e in vec if not math.isnan(e)]
+    return (len(vec_wo_nan))
+
+
+def read_gz_data(filename='data/machinetemp_groundtruth.gz'):
+    """
+    Load a file compressed with gz, such that each line of the
+    file is either 0 (representing a normal instance) or 1 (representing)
+    an anomalous instance.
+    :param filename: file path to the gz compressed file
+    :return: list of integers with either 0 or 1
+    """
+    with gzip.open(filename, 'rb') as f:
+        content = f.read().splitlines()
+    content = [int(x) for x in content]
+    return (content)
+
+
+def read_all_as_events():
+    """
+    Load the files contained in the folder `data/` and convert
+    to events. The length of the series is kept.
+    The convention for the file name is: `dataset_algorithm.gz`
+    :return: two dictionaries:
+        - the first containing the list of events for each dataset and algorithm,
+        - the second containing the range of the series for each dataset
+    """
+    filepaths = glob.glob('data/*.gz')
+    datasets = dict()
+    Tranges = dict()
+    for filepath in filepaths:
+        vector = read_gz_data(filepath)
+        events = convert_vector_to_events(vector)
+        # ad hoc cut for those files
+        cut_filepath = (os.path.split(filepath)[1]).split('_')
+        data_name = cut_filepath[0]
+        algo_name = (cut_filepath[1]).split('.')[0]
+        if not data_name in datasets:
+            datasets[data_name] = dict()
+            Tranges[data_name] = (0, len(vector))
+        datasets[data_name][algo_name] = events
+    return (datasets, Tranges)
+
+
+def f1_func(p, r):
+    """
+    Compute the f1 function
+    :param p: precision numeric value
+    :param r: recall numeric value
+    :return: f1 numeric value
+    """
+    return (2 * p * r / (p + r))
--- a/evaluation/affiliation_bin/metrics.py
+++ b/evaluation/affiliation_bin/metrics.py
@@ -0,0 +1,119 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+from .generics import (
+    infer_Trange,
+    has_point_anomalies,
+    _len_wo_nan,
+    _sum_wo_nan,
+    read_all_as_events)
+from ._affiliation_zone import (
+    get_all_E_gt_func,
+    affiliation_partition)
+from ._single_ground_truth_event import (
+    affiliation_precision_distance,
+    affiliation_recall_distance,
+    affiliation_precision_proba,
+    affiliation_recall_proba)
+
+
+def test_events(events):
+    """
+    Verify the validity of the input events
+    :param events: list of events, each represented by a couple (start, stop)
+    :return: None. Raise an error for incorrect formed or non ordered events
+    """
+    if type(events) is not list:
+        raise TypeError('Input `events` should be a list of couples')
+    if not all([type(x) is tuple for x in events]):
+        raise TypeError('Input `events` should be a list of tuples')
+    if not all([len(x) == 2 for x in events]):
+        raise ValueError('Input `events` should be a list of couples (start, stop)')
+    if not all([x[0] <= x[1] for x in events]):
+        raise ValueError('Input `events` should be a list of couples (start, stop) with start <= stop')
+    if not all([events[i][1] < events[i + 1][0] for i in range(len(events) - 1)]):
+        raise ValueError('Couples of input `events` should be disjoint and ordered')
+
+
+def pr_from_events(events_pred, events_gt, Trange):
+    """
+    Compute the affiliation_bin metrics including the precision/recall in [0,1],
+    along with the individual precision/recall distances and probabilities
+
+    :param events_pred: list of predicted events, each represented by a couple
+    indicating the start and the stop of the event
+    :param events_gt: list of ground truth events, each represented by a couple
+    indicating the start and the stop of the event
+    :param Trange: range of the series where events_pred and events_gt are included,
+    represented as a couple (start, stop)
+    :return: dictionary with precision, recall, and the individual metrics
+    """
+    # testing the inputs
+    test_events(events_pred)
+    test_events(events_gt)
+
+    # other tests
+    minimal_Trange = infer_Trange(events_pred, events_gt)
+    if not Trange[0] <= minimal_Trange[0]:
+        raise ValueError('`Trange` should include all the events')
+    if not minimal_Trange[1] <= Trange[1]:
+        raise ValueError('`Trange` should include all the events')
+
+    if len(events_gt) == 0:
+        raise ValueError('Input `events_gt` should have at least one event')
+
+    if has_point_anomalies(events_pred) or has_point_anomalies(events_gt):
+        raise ValueError('Cannot manage point anomalies currently')
+
+    if Trange is None:
+        # Set as default, but Trange should be indicated if probabilities are used
+        raise ValueError('Trange should be indicated (or inferred with the `infer_Trange` function')
+
+    E_gt = get_all_E_gt_func(events_gt, Trange)
+    aff_partition = affiliation_partition(events_pred, E_gt)
+
+    # Computing precision distance
+    d_precision = [affiliation_precision_distance(Is, J) for Is, J in zip(aff_partition, events_gt)]
+
+    # Computing recall distance
+    d_recall = [affiliation_recall_distance(Is, J) for Is, J in zip(aff_partition, events_gt)]
+
+    # Computing precision
+    p_precision = [affiliation_precision_proba(Is, J, E) for Is, J, E in zip(aff_partition, events_gt, E_gt)]
+
+    # Computing recall
+    p_recall = [affiliation_recall_proba(Is, J, E) for Is, J, E in zip(aff_partition, events_gt, E_gt)]
+
+    if _len_wo_nan(p_precision) > 0:
+        p_precision_average = _sum_wo_nan(p_precision) / _len_wo_nan(p_precision)
+    else:
+        p_precision_average = p_precision[0]  # math.nan
+    p_recall_average = sum(p_recall) / len(p_recall)
+
+    dict_out = dict({'precision': p_precision_average,
+                     'recall': p_recall_average,
+                     'individual_precision_probabilities': p_precision,
+                     'individual_recall_probabilities': p_recall,
+                     'individual_precision_distances': d_precision,
+                     'individual_recall_distances': d_recall})
+    return (dict_out)
+
+
+def produce_all_results():
+    """
+    Produce the affiliation_bin precision/recall for all files
+    contained in the `data` repository
+    :return: a dictionary indexed by data names, each containing a dictionary
+    indexed by algorithm names, each containing the results of the affiliation_bin
+    metrics (precision, recall, individual probabilities and distances)
+    """
+    datasets, Tranges = read_all_as_events()  # read all the events in folder `data`
+    results = dict()
+    for data_name in datasets.keys():
+        results_data = dict()
+        for algo_name in datasets[data_name].keys():
+            if algo_name != 'groundtruth':
+                results_data[algo_name] = pr_from_events(datasets[data_name][algo_name],
+                                                         datasets[data_name]['groundtruth'],
+                                                         Tranges[data_name])
+        results[data_name] = results_data
+    return (results)
--- a/evaluation/f1.py
+++ b/evaluation/f1.py
@@ -0,0 +1,12 @@
+from sklearn.metrics import f1_score, precision_score, recall_score
+
+
+def evaluate(y_true: list, y_pred: list) -> float:
+    """
+    F1评估方法
+    :param y_true: 真实标签
+    :param y_pred: 检测标签
+    :return: f1、recall、precision
+    """
+    f1 = f1_score(y_true, y_pred)
+    return f1
--- a/evaluation/f1pa.py
+++ b/evaluation/f1pa.py
@@ -0,0 +1,66 @@
+import numpy as np
+from sklearn.metrics import f1_score, precision_score, recall_score
+
+
+def adjust_predicts(score, label,
+                    threshold=None,
+                    pred=None,
+                    calc_latency=False):
+    """
+    该函数是point-adjust官方源码
+    Calculate adjusted predict labels using given `score`, `threshold` (or given `pred`) and `label`.
+    Args:
+        score =: The anomaly score
+        label : The ground-truth label
+        threshold (float): The threshold of anomaly score.
+            A point is labeled as "anomaly" if its score is lower than the threshold.
+        pred : if not None, adjust `pred` and ignore `score` and `threshold`,
+        calc_latency (bool):
+    Returns:
+        np.ndarray: predict labels
+    """
+    if len(score) != len(label):
+        raise ValueError("score and label must have the same length")
+    score = np.asarray(score)
+    label = np.asarray(label)
+    latency = 0
+    if pred is None:
+        predict = score < threshold
+    else:
+        predict = pred
+    actual = label > 0.1
+    anomaly_state = False
+    anomaly_count = 0
+    for i in range(len(score)):
+        if actual[i] and predict[i] and not anomaly_state:
+                anomaly_state = True
+                anomaly_count += 1
+                for j in range(i, 0, -1):
+                    if not actual[j]:
+                        break
+                    else:
+                        if not predict[j]:
+                            predict[j] = True
+                            latency += 1
+        elif not actual[i]:
+            anomaly_state = False
+        if anomaly_state:
+            predict[i] = True
+    if calc_latency:
+        return predict, latency / (anomaly_count + 1e-4)
+    else:
+        return predict
+
+
+def evaluate(y_true: list, y_pred: list) -> float:
+    """
+    F1PA评估方法，经过point adjust调整标签后再用F1评分
+    :param y_true: 真实标签
+    :param y_pred: 检测标签
+    :return: 经过pa调整后的f1、recall、precision
+    """
+    y_true, y_pred = y_true.copy(), y_pred.copy()
+    adjust_y_pred = adjust_predicts(score=np.array([0] * len(y_true)), label=y_true, pred=y_pred)
+    f1 = f1_score(y_true, adjust_y_pred)
+    return f1
+
--- a/evaluation/ftad.py
+++ b/evaluation/ftad.py
@@ -0,0 +1,156 @@
+import math
+import numpy as np
+
+
+def evaluate(y_true: [int], y_pred: [int], pos_label: int = 1, max_segment: int = 0) -> float:
+    """
+    基于异常段计算F值
+
+    :param y_true: 真实标签
+    :param y_pred: 检测标签
+    :param pos_label: 检测的目标数值，即指定哪个数为异常数值
+    :param max_segment: 异常段最大长度
+    :return: 段F值
+    """
+    p_tad = precision_tad(y_true=y_true, y_pred=y_pred, pos_label=pos_label, max_segment=max_segment)
+    r_tad = recall_tad(y_true=y_true, y_pred=y_pred, pos_label=pos_label, max_segment=max_segment)
+    score = 0
+    if p_tad and r_tad:
+        score = 2 * p_tad * r_tad / (p_tad + r_tad)
+    return score
+
+
+def recall_tad(y_true: [int], y_pred: [int], pos_label: int = 1, max_segment: int = 0) -> float:
+    """
+    基于异常段计算召回率
+
+    :param y_true: 真实标签
+    :param y_pred: 检测标签
+    :param pos_label: 检测的目标数值，即指定哪个数为异常数值
+    :param max_segment: 异常段最大长度
+    :return: 段召回率
+    """
+    if max_segment == 0:
+        max_segment = get_max_segment(y_true=y_true, pos_label=pos_label)
+    score = tp_count(y_true, y_pred, pos_label=pos_label, max_segment=max_segment)
+    return score
+
+
+def precision_tad(y_true: [int], y_pred: [int], pos_label: int = 1, max_segment: int = 0) -> float:
+    """
+    基于异常段计算精确率
+
+    :param y_true: 真实标签
+    :param y_pred: 检测标签
+    :param pos_label: 检测的目标数值，即指定哪个数为异常数值
+    :param max_segment: 异常段最大长度
+    :return: 段精确率
+    """
+    if max_segment == 0:
+        max_segment = get_max_segment(y_true=y_true, pos_label=pos_label)
+    score = tp_count(y_pred, y_true, pos_label=pos_label, max_segment=max_segment)
+    return score
+
+
+def tp_count(y_true: [int], y_pred: [int], max_segment: int = 0, pos_label: int = 1) -> float:
+    """
+    计算段的评分，交换y_true和y_pred可以分别表示召回率与精确率。
+
+    :param y_true: 真实标签
+    :param y_pred: 检测标签
+    :param pos_label: 检测的目标数值，即指定哪个数为异常数值
+    :param max_segment: 异常段最大长度
+    :return: 分数
+    """
+    if len(y_true) != len(y_pred):
+        raise ValueError("y_true and y_pred should have the same length.")
+    neg_label = 1 - pos_label
+    position = 0
+    tp_list = []
+    if max_segment == 0:
+        raise ValueError("max segment length is 0")
+    while position < len(y_true):
+        if y_true[position] == neg_label:
+            position += 1
+            continue
+        elif y_true[position] == pos_label:
+            start = position
+            while position < len(y_true) and y_true[position] == pos_label and position - start < max_segment:
+                position += 1
+            end = position
+            true_window = [weight_line(i/(end-start)) for i in range(end-start)]
+            true_window = softmax(true_window)
+            pred_window = np.array(y_pred[start:end])
+            pred_window = np.where(pred_window == pos_label, true_window, 0)
+            tp_list.append(sum(pred_window))
+        else:
+            raise ValueError("label value must be 0 or 1")
+    score = sum(tp_list) / len(tp_list) if len(tp_list) > 0 else 0
+    return score
+
+
+def weight_line(position: float) -> float:
+    """
+    按照权重曲线，给不同位置的点赋值
+
+    :param position: 点在段中的相对位置，取值范围[0,1]
+    :return: 权重值
+    """
+    if position < 0 or position > 1:
+        raise ValueError(f"point position in segment need between 0 and 1, {position} is error position")
+    sigma = 1 / (1 + math.exp(10*(position-0.5)))
+    return sigma
+
+
+def softmax(x: [float]) -> [float]:
+    """
+    softmax函数
+    :param x: 一个异常段的数据
+    :return: 经过softmax的一段数据
+    """
+    ret = np.exp(x)/np.sum(np.exp(x), axis=0)
+    return ret.tolist()
+
+
+def get_max_segment(y_true: [int], pos_label: int = 1) -> int:
+    """
+    获取最大的异常段的长度
+    :param y_true: 真实标签
+    :param pos_label: 异常标签的取值
+    :return: 最大长度
+    """
+    max_num, i = 0, 0
+    neg_label = 1 - pos_label
+    while i < len(y_true):
+        if y_true[i] == neg_label:
+            i += 1
+            continue
+        elif y_true[i] == pos_label:
+            start = i
+            while i < len(y_true) and y_true[i] == pos_label:
+                i += 1
+            end = i
+            max_num = max(max_num, end-start)
+        else:
+            raise ValueError("label value must be 0 or 1")
+    return max_num
+
+
+if __name__ == "__main__":
+    
+    # y_true = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    #           0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+    # y_pred = [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    #           0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+    import pandas as pd
+    data = pd.read_csv("../records/2023-04-10_10-30-27/detection_result/MtadGatAtt_SWAT.csv")
+    y_true = data["true"].tolist()
+    y_pred = data["ftad"].tolist()
+
+    print(evaluate(y_true, y_pred))
+    # print(precision_tad(y_true, y_pred))
+    # print(recall_tad(y_true, y_pred))
+    # from sklearn.metrics import f1_score, precision_score, recall_score
+    # print(f1_score(y_true, y_pred))
+    # print(precision_score(y_true, y_pred))
+    # print(recall_score(y_true, y_pred))
--- a/evaluation/template.py
+++ b/evaluation/template.py
@@ -0,0 +1,10 @@
+
+
+def evaluate(y_true: [int], y_pred: [int]) -> float:
+    """
+    评估方法
+    :param y_true: 真实标签
+    :param y_pred: 检测标签
+    :return: 分数，float类型
+    """
+    return 0.0
--- a/main.py
+++ b/main.py
@@ -0,0 +1,169 @@
+# -*- coding:utf-8 -*-
+import configparser
+from daemon import Daemon
+import sys
+from torch.cuda import is_available
+from loguru import logger
+from datetime import datetime
+from utils import create_all_dataloader, train_and_test_model
+import method
+import gc
+import traceback
+import pandas
+import os
+
+
+class Config(configparser.ConfigParser):
+    def __init__(self, defaults=None):
+        configparser.ConfigParser.__init__(self, defaults=defaults)
+
+    def optionxform(self, optionstr):
+        return optionstr
+
+    def as_dict(self):
+        d = dict(self._sections)
+        for k in d:
+            d[k] = dict(d[k])
+        return d
+
+
+class Main(Daemon):
+    def __init__(self, pidfile):
+        super(Main, self).__init__(pidfile=pidfile)
+        current = datetime.now()
+        self.start_time = current.strftime("%Y-%m-%d_%H-%M-%S")
+
+        if len(sys.argv) == 1 or sys.argv[1] == "start":
+            self.run()
+        elif sys.argv[1] == "stop":
+            self.stop()
+        elif sys.argv[1] == "daemon":
+            self.start()
+        else:
+            print("Input format error. Please input: python3 main.py start|stop|daemon")
+            sys.exit(0)
+
+    def run(self):
+        # 读取配置文件参数
+        cf = Config()
+        cf.read("./config.ini", encoding='utf8')
+        cf_dict = cf.as_dict()
+        # 读取数据集名称
+        dataset_names = cf.get("Dataset", "name")
+        datasets = dataset_names.split(",")
+        datasets = [name.strip() for name in datasets]
+        # 读取模型名称
+        model_names = cf.get("Method", "name")
+        models = model_names.split(",")
+        models = [name.strip() for name in models]
+        # 读取预处理方法
+        preprocess_name = cf.get("Preprocess", "name")
+        # 读取评估方法
+        evaluation_names = cf.get("Evaluation", "name")
+        evaluations = evaluation_names.split(",")
+        evaluations = [name.strip() for name in evaluations]
+        # 读取模型参数文件路径
+        model_path = cf_dict["ModelPath"]
+        # 读取训练参数
+        train = cf.getboolean("BaseParameters", "train")
+        epochs = cf.getint("BaseParameters", "epochs")
+        batch_size = cf.getint("BaseParameters", "batch_size")
+        learning_rate = cf.getfloat("BaseParameters", "learning_rate")
+        device = cf.get("BaseParameters", "device")
+        if device == "auto":
+            device = 'cuda:0' if is_available() else 'cpu'
+        # 读取自定义参数
+        customs = cf_dict["CustomParameters"]
+
+        # 建立本次实验记录的路径
+        os.makedirs(f"./records", exist_ok=True)
+        os.makedirs(f"./records/{self.start_time}", exist_ok=True)
+        os.makedirs(f"./records/{self.start_time}/detection_result", exist_ok=True)
+        os.makedirs(f"./records/{self.start_time}/model", exist_ok=True)
+
+        # 初始化日志
+        logger.add(f"./records/{self.start_time}/log",
+                   level='DEBUG',
+                   format='{time:YYYY-MM-DD HH:mm:ss} - {level} - {file} - {line} - {message}',
+                   rotation="100 MB")
+
+        # 核心程序
+        self.core(models, datasets, preprocess_name, evaluations, model_path, train, epochs, batch_size, learning_rate,
+                  device, customs)
+        logger.info(f"实验结束，关闭进程")
+
+    def core(self, models: [str], datasets: [str], preprocess_name: str, evaluations: [str], model_path: {}, train: bool,
+             epochs: int, batch_size: int, learning_rate: float, device: str, customs: {}):
+        """
+        初始化数据集与模型，并开始训练与测试
+        :param models: 训练的模型名称，可包含多个
+        :param datasets: 使用的数据集名称，可包含多个
+        :param preprocess_name: 预处理方法名称
+        :param evaluations: 评估方法名称，可包含多个
+        :param model_path: 需要加载模型参数的路径，可包含多个
+        :param train: 是否训练，如果为False，则仅测试模型
+        :param epochs: 总训练轮数
+        :param batch_size: batch的尺寸
+        :param learning_rate: 学习率
+        :param device: 设备
+        :param customs: 自定义参数
+        """
+        logger.info(f"加载数据集")
+        try:
+            # 初始化所有数据集
+            all_dataloader = create_all_dataloader(datasets=datasets, input_size=int(customs["input_size"]),
+                                                   output_size=int(customs["output_size"]), step=int(customs["step"]),
+                                                   batch_size=batch_size, time_index=customs["time_index"] == "true",
+                                                   del_column_name=customs["del_column_name"] == "true",
+                                                   preprocess_name=preprocess_name)
+        except RuntimeError:
+            logger.error(traceback.format_exc())
+            return
+
+        # 开始训练与测试
+
+        for model_name in models:
+            try:
+                logger.info(f"------------华丽丽的分界线：{model_name} 实验开始------------")
+                for i in range(len(all_dataloader)):
+                    dataloader = all_dataloader[i]
+                    all_score = {}
+                    for sub_dataloader in dataloader:
+                        dataset_name = sub_dataloader["dataset_name"]
+                        normal_dataloader = sub_dataloader["normal"]
+                        attack_dataloader = sub_dataloader["attack"]
+                        logger.info(f"初始化模型 {model_name}")
+                        model = eval(f"method.{model_name}.Model")(customs=customs, dataloader=normal_dataloader)
+                        model = model.to(device)
+                        logger.info(f"模型初始化完成")
+                        pth_name = model_path[f"{model_name}_{dataset_name}"] if f"{model_name}_{dataset_name}" \
+                                                                                 in model_path else None
+                        best_score, best_detection = train_and_test_model(start_time=self.start_time, epochs=epochs,
+                                                                          normal_dataloader=normal_dataloader,
+                                                                          attack_dataloader=attack_dataloader,
+                                                                          model=model, evaluations=evaluations,
+                                                                          device=device, lr=learning_rate,
+                                                                          model_path=pth_name, train=train)
+                        # 保存最佳检测结果的标签为csv文件
+                        best_detection = pandas.DataFrame(best_detection)
+                        best_detection.to_csv(f"./records/{self.start_time}/detection_result/{model_name}_{dataset_name}.csv", index=False)
+                        for evaluation_name in evaluations:
+                            if evaluation_name not in all_score:
+                                all_score[evaluation_name] = []
+                            all_score[evaluation_name].append(best_score[evaluation_name])
+                        gc.collect()
+                    logger.info(f"------------------------")
+                    logger.info(f"{model_name} / {datasets[i]} 实验完毕")
+                    for evaluation_name in all_score:
+                        logger.info(f"{evaluation_name}: {'{:.3f}'.format(sum(all_score[evaluation_name]) / len(all_score[evaluation_name]))}")
+                    logger.info(f"------------------------")
+                logger.info(f"------------华丽丽的分界线：{model_name} 实验结束------------")
+            except RuntimeError:
+                logger.error(traceback.format_exc())
+        return
+
+
+if __name__ == '__main__':
+    pidpath = "/tmp/command_detection.pid"
+    app = Main(pidpath)
+
--- a/method/AnomalyTransformer.py
+++ b/method/AnomalyTransformer.py
@@ -0,0 +1,305 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+import math
+from math import sqrt
+import torch.utils.data as tud
+
+
+class PositionalEmbedding(nn.Module):
+    def __init__(self, d_model, max_len=5000):
+        super(PositionalEmbedding, self).__init__()
+        # Compute the positional encodings once in log space.
+        pe = torch.zeros(max_len, d_model).float()
+        pe.require_grad = False
+
+        position = torch.arange(0, max_len).float().unsqueeze(1)
+        div_term = (torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)).exp()
+
+        pe[:, 0::2] = torch.sin(position * div_term)
+        pe[:, 1::2] = torch.cos(position * div_term)
+
+        pe = pe.unsqueeze(0)
+        self.register_buffer('pe', pe)
+
+    def forward(self, x):
+        return self.pe[:, :x.size(1)]
+
+
+class TokenEmbedding(nn.Module):
+    def __init__(self, c_in, d_model):
+        super(TokenEmbedding, self).__init__()
+        padding = 1 if torch.__version__ >= '1.5.0' else 2
+        self.tokenConv = nn.Conv1d(in_channels=c_in, out_channels=d_model,
+                                   kernel_size=3, padding=padding, padding_mode='circular', bias=False)
+        for m in self.modules():
+            if isinstance(m, nn.Conv1d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='leaky_relu')
+
+    def forward(self, x):
+        x = self.tokenConv(x.permute(0, 2, 1)).transpose(1, 2)
+        return x
+
+
+class DataEmbedding(nn.Module):
+    def __init__(self, c_in, d_model, dropout=0.0):
+        super(DataEmbedding, self).__init__()
+
+        self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model)
+        self.position_embedding = PositionalEmbedding(d_model=d_model)
+
+        self.dropout = nn.Dropout(p=dropout)
+
+    def forward(self, x):
+        x = self.value_embedding(x) + self.position_embedding(x)
+        return self.dropout(x)
+
+
+class TriangularCausalMask():
+    def __init__(self, B, L, device="cpu"):
+        mask_shape = [B, 1, L, L]
+        with torch.no_grad():
+            self._mask = torch.triu(torch.ones(mask_shape, dtype=torch.bool), diagonal=1).to(device)
+
+    @property
+    def mask(self):
+        return self._mask
+
+
+class AnomalyAttention(nn.Module):
+    def __init__(self, win_size, mask_flag=True, scale=None, attention_dropout=0.0, output_attention=False):
+        super(AnomalyAttention, self).__init__()
+        self.scale = scale
+        self.mask_flag = mask_flag
+        self.output_attention = output_attention
+        self.dropout = nn.Dropout(attention_dropout)
+        window_size = win_size
+        self.distances = torch.zeros((window_size, window_size)).cuda()
+        for i in range(window_size):
+            for j in range(window_size):
+                self.distances[i][j] = abs(i - j)
+
+    def forward(self, queries, keys, values, sigma, attn_mask):
+        B, L, H, E = queries.shape
+        _, S, _, D = values.shape
+        scale = self.scale or 1. / sqrt(E)
+
+        scores = torch.einsum("blhe,bshe->bhls", queries, keys)
+        if self.mask_flag:
+            if attn_mask is None:
+                attn_mask = TriangularCausalMask(B, L, device=queries.device)
+            scores.masked_fill_(attn_mask.mask, -np.inf)
+        attn = scale * scores
+
+        sigma = sigma.transpose(1, 2)  # B L H ->  B H L
+        window_size = attn.shape[-1]
+        sigma = torch.sigmoid(sigma * 5) + 1e-5
+        sigma = torch.pow(3, sigma) - 1
+        sigma = sigma.unsqueeze(-1).repeat(1, 1, 1, window_size)  # B H L L
+        prior = self.distances.unsqueeze(0).unsqueeze(0).repeat(sigma.shape[0], sigma.shape[1], 1, 1).cuda()
+        prior = 1.0 / (math.sqrt(2 * math.pi) * sigma) * torch.exp(-prior ** 2 / 2 / (sigma ** 2))
+
+        series = self.dropout(torch.softmax(attn, dim=-1))
+        V = torch.einsum("bhls,bshd->blhd", series, values)
+
+        if self.output_attention:
+            return (V.contiguous(), series, prior, sigma)
+        else:
+            return (V.contiguous(), None)
+
+
+class AttentionLayer(nn.Module):
+    def __init__(self, attention, d_model, n_heads, d_keys=None,
+                 d_values=None):
+        super(AttentionLayer, self).__init__()
+
+        d_keys = d_keys or (d_model // n_heads)
+        d_values = d_values or (d_model // n_heads)
+        self.norm = nn.LayerNorm(d_model)
+        self.inner_attention = attention
+        self.query_projection = nn.Linear(d_model,
+                                          d_keys * n_heads)
+        self.key_projection = nn.Linear(d_model,
+                                        d_keys * n_heads)
+        self.value_projection = nn.Linear(d_model,
+                                          d_values * n_heads)
+        self.sigma_projection = nn.Linear(d_model,
+                                          n_heads)
+        self.out_projection = nn.Linear(d_values * n_heads, d_model)
+
+        self.n_heads = n_heads
+
+    def forward(self, queries, keys, values, attn_mask):
+        B, L, _ = queries.shape
+        _, S, _ = keys.shape
+        H = self.n_heads
+        x = queries
+        queries = self.query_projection(queries).view(B, L, H, -1)
+        keys = self.key_projection(keys).view(B, S, H, -1)
+        values = self.value_projection(values).view(B, S, H, -1)
+        sigma = self.sigma_projection(x).view(B, L, H)
+
+        out, series, prior, sigma = self.inner_attention(
+            queries,
+            keys,
+            values,
+            sigma,
+            attn_mask
+        )
+        out = out.view(B, L, -1)
+
+        return self.out_projection(out), series, prior, sigma
+
+
+class EncoderLayer(nn.Module):
+    def __init__(self, attention, d_model, d_ff=None, dropout=0.1, activation="relu"):
+        super(EncoderLayer, self).__init__()
+        d_ff = d_ff or 4 * d_model
+        self.attention = attention
+        self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
+        self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)
+        self.norm1 = nn.LayerNorm(d_model)
+        self.norm2 = nn.LayerNorm(d_model)
+        self.dropout = nn.Dropout(dropout)
+        self.activation = F.relu if activation == "relu" else F.gelu
+
+    def forward(self, x, attn_mask=None):
+        new_x, attn, mask, sigma = self.attention(
+            x, x, x,
+            attn_mask=attn_mask
+        )
+        x = x + self.dropout(new_x)
+        y = x = self.norm1(x)
+        y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
+        y = self.dropout(self.conv2(y).transpose(-1, 1))
+
+        return self.norm2(x + y), attn, mask, sigma
+
+
+class Encoder(nn.Module):
+    def __init__(self, attn_layers, norm_layer=None):
+        super(Encoder, self).__init__()
+        self.attn_layers = nn.ModuleList(attn_layers)
+        self.norm = norm_layer
+
+    def forward(self, x, attn_mask=None):
+        # x [B, L, D]
+        series_list = []
+        prior_list = []
+        sigma_list = []
+        for attn_layer in self.attn_layers:
+            x, series, prior, sigma = attn_layer(x, attn_mask=attn_mask)
+            series_list.append(series)
+            prior_list.append(prior)
+            sigma_list.append(sigma)
+
+        if self.norm is not None:
+            x = self.norm(x)
+
+        return x, series_list, prior_list, sigma_list
+
+
+class Model(nn.Module):
+    def __init__(self, customs: {}, dataloader: tud.DataLoader):
+        super(Model, self).__init__()
+        win_size = int(customs["input_size"])
+        enc_in = c_out = dataloader.dataset.train_inputs.shape[-1]
+        d_model = 512
+        n_heads = 8
+        e_layers = 3
+        d_ff = 512
+        dropout = 0.0
+        activation = 'gelu'
+        output_attention = True
+        self.k = 3
+        self.win_size = win_size
+
+        self.name = "AnomalyTransformer"
+        # Encoding
+        self.embedding = DataEmbedding(enc_in, d_model, dropout)
+
+        # Encoder
+        self.encoder = Encoder(
+            [
+                EncoderLayer(
+                    AttentionLayer(
+                        AnomalyAttention(win_size, False, attention_dropout=dropout, output_attention=output_attention),
+                        d_model, n_heads),
+                    d_model,
+                    d_ff,
+                    dropout=dropout,
+                    activation=activation
+                ) for l in range(e_layers)
+            ],
+            norm_layer=torch.nn.LayerNorm(d_model)
+        )
+
+        self.projection = nn.Linear(d_model, c_out, bias=True)
+
+    def forward(self, x):
+        enc_out = self.embedding(x)
+        enc_out, series, prior, sigmas = self.encoder(enc_out)
+        enc_out = self.projection(enc_out)
+        return enc_out, series, prior, sigmas
+
+    @staticmethod
+    def my_kl_loss(p, q):
+        res = p * (torch.log(p + 0.0001) - torch.log(q + 0.0001))
+        return torch.mean(torch.sum(res, dim=-1), dim=1)
+
+    def loss(self, x, y_true, epoch: int = None, device: str = "cpu"):
+        output, series, prior, _ = self.forward(x)
+        series_loss = 0.0
+        prior_loss = 0.0
+        for u in range(len(prior)):
+            series_loss += (torch.mean(self.my_kl_loss(series[u], (prior[u] / torch.unsqueeze(torch.sum(prior[u], dim=-1), dim=-1).repeat(1, 1, 1, self.win_size)).detach())) +
+                            torch.mean(self.my_kl_loss((prior[u] / torch.unsqueeze(torch.sum(prior[u], dim=-1), dim=-1).repeat(1, 1, 1, self.win_size)).detach(), series[u])))
+
+            prior_loss += (torch.mean(self.my_kl_loss((prior[u] / torch.unsqueeze(torch.sum(prior[u], dim=-1), dim=-1).repeat(1, 1, 1, self.win_size)), series[u].detach())) +
+                           torch.mean(self.my_kl_loss(series[u].detach(), (prior[u] / torch.unsqueeze(torch.sum(prior[u], dim=-1), dim=-1).repeat(1, 1, 1, self.win_size)))))
+        series_loss = series_loss / len(prior)
+        prior_loss = prior_loss / len(prior)
+        rec_loss = nn.MSELoss()(output, x)
+
+        loss1 = rec_loss - self.k * series_loss
+        loss2 = rec_loss + self.k * prior_loss
+
+        # Minimax strategy
+        loss1.backward(retain_graph=True)
+        loss2.backward()
+
+        return loss1.item()
+
+    def detection(self, x, y_true, epoch: int = None, device: str = "cpu"):
+        temperature = 50
+        output, series, prior, _ = self.forward(x)
+
+        loss = torch.mean(nn.MSELoss()(x, output), dim=-1)
+
+        series_loss = 0.0
+        prior_loss = 0.0
+        for u in range(len(prior)):
+            if u == 0:
+                series_loss = self.my_kl_loss(series[u], (
+                        prior[u] / torch.unsqueeze(torch.sum(prior[u], dim=-1), dim=-1).repeat(1, 1, 1,
+                                                                                               self.win_size)).detach()) * temperature
+                prior_loss = self.my_kl_loss(
+                    (prior[u] / torch.unsqueeze(torch.sum(prior[u], dim=-1), dim=-1).repeat(1, 1, 1,
+                                                                                            self.win_size)),
+                    series[u].detach()) * temperature
+            else:
+                series_loss += self.my_kl_loss(series[u], (
+                        prior[u] / torch.unsqueeze(torch.sum(prior[u], dim=-1), dim=-1).repeat(1, 1, 1,
+                                                                                               self.win_size)).detach()) * temperature
+                prior_loss += self.my_kl_loss(
+                    (prior[u] / torch.unsqueeze(torch.sum(prior[u], dim=-1), dim=-1).repeat(1, 1, 1,
+                                                                                            self.win_size)),
+                    series[u].detach()) * temperature
+        metric = torch.softmax((-series_loss - prior_loss), dim=-1)
+
+        cri = metric * loss
+        cri = cri.mean(dim=-1)
+        return cri, None
+
+
--- a/method/MtadGat.py
+++ b/method/MtadGat.py
@@ -0,0 +1,414 @@
+"""
+github找的第三方实现的Mtad-Gat源码
+由于源码使用了torch.empty，经常导致loss为nan，因此替换为了torch.zeros
+"""
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.utils.data as tud
+
+
+class ConvLayer(nn.Module):
+    """1-D Convolution layer to extract high-level features of each time-series input
+    :param n_features: Number of input features/nodes
+    :param window_size: length of the input sequence
+    :param kernel_size: size of kernel to use in the convolution operation
+    """
+
+    def __init__(self, n_features, kernel_size=7):
+        super(ConvLayer, self).__init__()
+        self.padding = nn.ConstantPad1d((kernel_size - 1) // 2, 0.0)
+        self.conv = nn.Conv1d(in_channels=n_features, out_channels=n_features, kernel_size=kernel_size)
+        self.relu = nn.ReLU()
+
+    def forward(self, x):
+        x = x.permute(0, 2, 1)
+        x = self.padding(x)
+        x = self.relu(self.conv(x))
+        return x.permute(0, 2, 1)  # Permute back
+
+
+class FeatureAttentionLayer(nn.Module):
+    """Single Graph Feature/Spatial Attention Layer
+    :param n_features: Number of input features/nodes
+    :param window_size: length of the input sequence
+    :param dropout: percentage of nodes to dropout
+    :param alpha: negative slope used in the leaky rely activation function
+    :param embed_dim: embedding dimension (output dimension of linear transformation)
+    :param use_gatv2: whether to use the modified attention mechanism of GATv2 instead of standard GAT
+    :param use_bias: whether to include a bias term in the attention layer
+    """
+
+    def __init__(self, n_features, window_size, dropout, alpha, embed_dim=None, use_gatv2=True, use_bias=True):
+        super(FeatureAttentionLayer, self).__init__()
+        self.n_features = n_features
+        self.window_size = window_size
+        self.dropout = dropout
+        self.embed_dim = embed_dim if embed_dim is not None else window_size
+        self.use_gatv2 = use_gatv2
+        self.num_nodes = n_features
+        self.use_bias = use_bias
+
+        # Because linear transformation is done after concatenation in GATv2
+        if self.use_gatv2:
+            self.embed_dim *= 2
+            lin_input_dim = 2 * window_size
+            a_input_dim = self.embed_dim
+        else:
+            lin_input_dim = window_size
+            a_input_dim = 2 * self.embed_dim
+
+        self.lin = nn.Linear(lin_input_dim, self.embed_dim)
+        # self.a = nn.Parameter(torch.empty((a_input_dim, 1)))
+        self.a = nn.Parameter(torch.zeros((a_input_dim, 1)))
+        nn.init.xavier_uniform_(self.a.data, gain=1.414)
+
+        if self.use_bias:
+            # self.bias = nn.Parameter(torch.empty(n_features, n_features))
+            self.bias = nn.Parameter(torch.zeros(n_features, n_features))
+
+        self.leakyrelu = nn.LeakyReLU(alpha)
+        self.sigmoid = nn.Sigmoid()
+
+    def forward(self, x):
+        # x shape (b, n, k): b - batch size, n - window size, k - number of features
+        # For feature attention we represent a node as the values of a particular feature across all timestamps
+
+        x = x.permute(0, 2, 1)
+
+        # 'Dynamic' GAT attention
+        # Proposed by Brody et. al., 2021 (https://arxiv.org/pdf/2105.14491.pdf)
+        # Linear transformation applied after concatenation and attention layer applied after leakyrelu
+        if self.use_gatv2:
+            a_input = self._make_attention_input(x)                 # (b, k, k, 2*window_size)
+            a_input = self.leakyrelu(self.lin(a_input))             # (b, k, k, embed_dim)
+            e = torch.matmul(a_input, self.a).squeeze(3)            # (b, k, k, 1)
+
+        # Original GAT attention
+        else:
+            Wx = self.lin(x)                                                  # (b, k, k, embed_dim)
+            a_input = self._make_attention_input(Wx)                          # (b, k, k, 2*embed_dim)
+            e = self.leakyrelu(torch.matmul(a_input, self.a)).squeeze(3)      # (b, k, k, 1)
+
+        if self.use_bias:
+            e += self.bias
+
+        # Attention weights
+        attention = torch.softmax(e, dim=2)
+        attention = torch.dropout(attention, self.dropout, train=self.training)
+
+        # Computing new node features using the attention
+        h = self.sigmoid(torch.matmul(attention, x))
+
+        return h.permute(0, 2, 1)
+
+    def _make_attention_input(self, v):
+        """Preparing the feature attention mechanism.
+        Creating matrix with all possible combinations of concatenations of node.
+        Each node consists of all values of that node within the window
+            v1 || v1,
+            ...
+            v1 || vK,
+            v2 || v1,
+            ...
+            v2 || vK,
+            ...
+            ...
+            vK || v1,
+            ...
+            vK || vK,
+        """
+
+        K = self.num_nodes
+        blocks_repeating = v.repeat_interleave(K, dim=1)  # Left-side of the matrix
+        blocks_alternating = v.repeat(1, K, 1)  # Right-side of the matrix
+        combined = torch.cat((blocks_repeating, blocks_alternating), dim=2)  # (b, K*K, 2*window_size)
+
+        if self.use_gatv2:
+            return combined.view(v.size(0), K, K, 2 * self.window_size)
+        else:
+            return combined.view(v.size(0), K, K, 2 * self.embed_dim)
+
+
+class TemporalAttentionLayer(nn.Module):
+    """Single Graph Temporal Attention Layer
+    :param n_features: number of input features/nodes
+    :param window_size: length of the input sequence
+    :param dropout: percentage of nodes to dropout
+    :param alpha: negative slope used in the leaky rely activation function
+    :param embed_dim: embedding dimension (output dimension of linear transformation)
+    :param use_gatv2: whether to use the modified attention mechanism of GATv2 instead of standard GAT
+    :param use_bias: whether to include a bias term in the attention layer
+
+    """
+
+    def __init__(self, n_features, window_size, dropout, alpha, embed_dim=None, use_gatv2=True, use_bias=True):
+        super(TemporalAttentionLayer, self).__init__()
+        self.n_features = n_features
+        self.window_size = window_size
+        self.dropout = dropout
+        self.use_gatv2 = use_gatv2
+        self.embed_dim = embed_dim if embed_dim is not None else n_features
+        self.num_nodes = window_size
+        self.use_bias = use_bias
+
+        # Because linear transformation is performed after concatenation in GATv2
+        if self.use_gatv2:
+            self.embed_dim *= 2
+            lin_input_dim = 2 * n_features
+            a_input_dim = self.embed_dim
+        else:
+            lin_input_dim = n_features
+            a_input_dim = 2 * self.embed_dim
+
+        self.lin = nn.Linear(lin_input_dim, self.embed_dim)
+        # self.a = nn.Parameter(torch.empty((a_input_dim, 1)))
+        self.a = nn.Parameter(torch.zeros((a_input_dim, 1)))
+        nn.init.xavier_uniform_(self.a.data, gain=1.414)
+
+        if self.use_bias:
+            # self.bias = nn.Parameter(torch.empty(window_size, window_size))
+            self.bias = nn.Parameter(torch.zeros(window_size, window_size))
+        self.leakyrelu = nn.LeakyReLU(alpha)
+        self.sigmoid = nn.Sigmoid()
+
+    def forward(self, x):
+        # x shape (b, n, k): b - batch size, n - window size, k - number of features
+        # For temporal attention a node is represented as all feature values at a specific timestamp
+
+        # 'Dynamic' GAT attention
+        # Proposed by Brody et. al., 2021 (https://arxiv.org/pdf/2105.14491.pdf)
+        # Linear transformation applied after concatenation and attention layer applied after leakyrelu
+        if self.use_gatv2:
+            a_input = self._make_attention_input(x)              # (b, n, n, 2*n_features)
+            a_input = self.leakyrelu(self.lin(a_input))          # (b, n, n, embed_dim)
+            e = torch.matmul(a_input, self.a).squeeze(3)         # (b, n, n, 1)
+
+        # Original GAT attention
+        else:
+            Wx = self.lin(x)                                                  # (b, n, n, embed_dim)
+            a_input = self._make_attention_input(Wx)                          # (b, n, n, 2*embed_dim)
+            e = self.leakyrelu(torch.matmul(a_input, self.a)).squeeze(3)      # (b, n, n, 1)
+
+        if self.use_bias:
+            e += self.bias  # (b, n, n, 1)
+
+        # Attention weights
+        attention = torch.softmax(e, dim=2)
+        attention = torch.dropout(attention, self.dropout, train=self.training)
+
+        h = self.sigmoid(torch.matmul(attention, x))    # (b, n, k)
+
+        return h
+
+    def _make_attention_input(self, v):
+        """Preparing the temporal attention mechanism.
+        Creating matrix with all possible combinations of concatenations of node values:
+            (v1, v2..)_t1 || (v1, v2..)_t1
+            (v1, v2..)_t1 || (v1, v2..)_t2
+
+            ...
+            ...
+
+            (v1, v2..)_tn || (v1, v2..)_t1
+            (v1, v2..)_tn || (v1, v2..)_t2
+
+        """
+
+        K = self.num_nodes
+        blocks_repeating = v.repeat_interleave(K, dim=1)  # Left-side of the matrix
+        blocks_alternating = v.repeat(1, K, 1)  # Right-side of the matrix
+        combined = torch.cat((blocks_repeating, blocks_alternating), dim=2)
+
+        if self.use_gatv2:
+            return combined.view(v.size(0), K, K, 2 * self.n_features)
+        else:
+            return combined.view(v.size(0), K, K, 2 * self.embed_dim)
+
+
+class GRULayer(nn.Module):
+    """Gated Recurrent Unit (GRU) Layer
+    :param in_dim: number of input features
+    :param hid_dim: hidden size of the GRU
+    :param n_layers: number of layers in GRU
+    :param dropout: dropout rate
+    """
+
+    def __init__(self, in_dim, hid_dim, n_layers, dropout):
+        super(GRULayer, self).__init__()
+        self.hid_dim = hid_dim
+        self.n_layers = n_layers
+        self.dropout = 0.0 if n_layers == 1 else dropout
+        self.gru = nn.GRU(in_dim, hid_dim, num_layers=n_layers, batch_first=True, dropout=self.dropout)
+
+    def forward(self, x):
+        out, h = self.gru(x)
+        out, h = out[-1, :, :], h[-1, :, :]  # Extracting from last layer
+        return out, h
+
+
+class RNNDecoder(nn.Module):
+    """GRU-based Decoder network that converts latent vector into output
+    :param in_dim: number of input features
+    :param n_layers: number of layers in RNN
+    :param hid_dim: hidden size of the RNN
+    :param dropout: dropout rate
+    """
+
+    def __init__(self, in_dim, hid_dim, n_layers, dropout):
+        super(RNNDecoder, self).__init__()
+        self.in_dim = in_dim
+        self.dropout = 0.0 if n_layers == 1 else dropout
+        self.rnn = nn.GRU(in_dim, hid_dim, n_layers, batch_first=True, dropout=self.dropout)
+
+    def forward(self, x):
+        decoder_out, _ = self.rnn(x)
+        return decoder_out
+
+
+class ReconstructionModel(nn.Module):
+    """Reconstruction Model
+    :param window_size: length of the input sequence
+    :param in_dim: number of input features
+    :param n_layers: number of layers in RNN
+    :param hid_dim: hidden size of the RNN
+    :param in_dim: number of output features
+    :param dropout: dropout rate
+    """
+
+    def __init__(self, window_size, in_dim, hid_dim, out_dim, n_layers, dropout):
+        super(ReconstructionModel, self).__init__()
+        self.window_size = window_size
+        self.decoder = RNNDecoder(in_dim, hid_dim, n_layers, dropout)
+        self.fc = nn.Linear(hid_dim, out_dim)
+
+    def forward(self, x):
+        # x will be last hidden state of the GRU layer
+        h_end = x
+        h_end_rep = h_end.repeat_interleave(self.window_size, dim=1).view(x.size(0), self.window_size, -1)
+
+        decoder_out = self.decoder(h_end_rep)
+        out = self.fc(decoder_out)
+        return out
+
+
+class Forecasting_Model(nn.Module):
+    """Forecasting model (fully-connected network)
+    :param in_dim: number of input features
+    :param hid_dim: hidden size of the FC network
+    :param out_dim: number of output features
+    :param n_layers: number of FC layers
+    :param dropout: dropout rate
+    """
+
+    def __init__(self, in_dim, hid_dim, out_dim, n_layers, dropout):
+        super(Forecasting_Model, self).__init__()
+        layers = [nn.Linear(in_dim, hid_dim)]
+        for _ in range(n_layers - 1):
+            layers.append(nn.Linear(hid_dim, hid_dim))
+
+        layers.append(nn.Linear(hid_dim, out_dim))
+
+        self.layers = nn.ModuleList(layers)
+        self.dropout = nn.Dropout(dropout)
+        self.relu = nn.ReLU()
+
+    def forward(self, x):
+        for i in range(len(self.layers) - 1):
+            x = self.relu(self.layers[i](x))
+            x = self.dropout(x)
+        return self.layers[-1](x)
+
+
+class Model(nn.Module):
+    """ MTAD_GAT model class.
+
+    :param n_features: Number of input features
+    :param window_size: Length of the input sequence
+    :param out_dim: Number of features to output
+    :param kernel_size: size of kernel to use in the 1-D convolution
+    :param feat_gat_embed_dim: embedding dimension (output dimension of linear transformation)
+           in feat-oriented GAT layer
+    :param time_gat_embed_dim: embedding dimension (output dimension of linear transformation)
+           in time-oriented GAT layer
+    :param use_gatv2: whether to use the modified attention mechanism of GATv2 instead of standard GAT
+    :param gru_n_layers: number of layers in the GRU layer
+    :param gru_hid_dim: hidden dimension in the GRU layer
+    :param forecast_n_layers: number of layers in the FC-based Forecasting Model
+    :param forecast_hid_dim: hidden dimension in the FC-based Forecasting Model
+    :param recon_n_layers: number of layers in the GRU-based Reconstruction Model
+    :param recon_hid_dim: hidden dimension in the GRU-based Reconstruction Model
+    :param dropout: dropout rate
+    :param alpha: negative slope used in the leaky rely activation function
+
+    """
+
+    def __init__(self, customs: {}, dataloader: tud.DataLoader):
+        super(Model, self).__init__()
+        n_features = dataloader.dataset.train_inputs.shape[-1]
+        window_size = int(customs["input_size"])
+        out_dim = n_features
+        kernel_size = 7
+        feat_gat_embed_dim = None
+        time_gat_embed_dim = None
+        use_gatv2 = True
+        gru_n_layers = 1
+        gru_hid_dim = 150
+        forecast_n_layers = 1
+        forecast_hid_dim = 150
+        recon_n_layers = 1
+        recon_hid_dim = 150
+        dropout = 0.2
+        alpha = 0.2
+
+        self.name = "MtadGat"
+        self.conv = ConvLayer(n_features, kernel_size)
+        self.feature_gat = FeatureAttentionLayer(
+            n_features, window_size, dropout, alpha, feat_gat_embed_dim, use_gatv2)
+        self.temporal_gat = TemporalAttentionLayer(n_features, window_size, dropout, alpha, time_gat_embed_dim,
+                                                   use_gatv2)
+        self.gru = GRULayer(3 * n_features, gru_hid_dim, gru_n_layers, dropout)
+        self.forecasting_model = Forecasting_Model(
+            gru_hid_dim, forecast_hid_dim, out_dim, forecast_n_layers, dropout)
+        self.recon_model = ReconstructionModel(window_size, gru_hid_dim, recon_hid_dim, out_dim, recon_n_layers,
+                                               dropout)
+
+    def forward(self, x):
+        # x shape (b, n, k): b - batch size, n - window size, k - number of features
+
+        x = self.conv(x)
+        h_feat = self.feature_gat(x)
+        h_temp = self.temporal_gat(x)
+
+        h_cat = torch.cat([x, h_feat, h_temp], dim=2)  # (b, n, 3k)
+
+        _, h_end = self.gru(h_cat)
+        h_end = h_end.view(x.shape[0], -1)  # Hidden state for last timestamp
+
+        predictions = self.forecasting_model(h_end)
+        recons = self.recon_model(h_end)
+
+        return predictions, recons
+
+    def loss(self, x, y_true, epoch: int = None, device: str = "cpu"):
+        preds, recons = self.forward(x)
+        if preds.ndim == 3:
+            preds = preds.squeeze(1)
+        if y_true.ndim == 3:
+            y_true = y_true.squeeze(1)
+        forecast_criterion = nn.MSELoss()
+        recon_criterion = nn.MSELoss()
+        forecast_loss = torch.sqrt(forecast_criterion(y_true, preds))
+        recon_loss = torch.sqrt(recon_criterion(x, recons))
+        loss = forecast_loss + recon_loss
+        loss.backward()
+        return loss.item()
+
+    def detection(self, x, y_true, epoch: int = None, device: str = "cpu"):
+        preds, recons = self.forward(x)
+        score = F.pairwise_distance(recons.reshape(recons.size(0), -1), x.reshape(x.size(0), -1)) + \
+                F.pairwise_distance(y_true.reshape(y_true.size(0), -1), preds.reshape(preds.size(0), -1))
+        return score, None
+
+
--- a/method/MtadGatAtt.py
+++ b/method/MtadGatAtt.py
@@ -0,0 +1,647 @@
+import torch
+import torch.nn as nn
+from math import sqrt
+import torch.nn.functional as F
+import numpy as np
+import torch.utils.data as tud
+
+
+class ConvLayer(nn.Module):
+    """1-D Convolution layer to extract high-level features of each time-series input
+    :param n_features: Number of input features/nodes
+    :param window_size: length of the input sequence
+    :param kernel_size: size of kernel to use in the convolution operation
+    """
+
+    def __init__(self, n_features, kernel_size=7):
+        super(ConvLayer, self).__init__()
+        self.padding = nn.ConstantPad1d((kernel_size - 1) // 2, 0.0)
+        self.conv = nn.Conv1d(in_channels=n_features, out_channels=n_features, kernel_size=kernel_size)
+        self.relu = nn.ReLU()
+
+    def forward(self, x):
+        x = x.permute(0, 2, 1)
+        x = self.padding(x)
+        x = self.relu(self.conv(x))
+        return x.permute(0, 2, 1)  # Permute back
+
+
+class FeatureAttentionLayer(nn.Module):
+    """Single Graph Feature/Spatial Attention Layer
+    :param n_features: Number of input features/nodes
+    :param window_size: length of the input sequence
+    :param dropout: percentage of nodes to dropout
+    :param alpha: negative slope used in the leaky rely activation function
+    :param embed_dim: embedding dimension (output dimension of linear transformation)
+    :param use_gatv2: whether to use the modified attention mechanism of GATv2 instead of standard GAT
+    :param use_bias: whether to include a bias term in the attention layer
+    """
+
+    def __init__(self, n_features, window_size, dropout, alpha, embed_dim=None, use_gatv2=True, use_bias=True,
+                 use_softmax=True):
+        super(FeatureAttentionLayer, self).__init__()
+        self.n_features = n_features
+        self.window_size = window_size
+        self.dropout = dropout
+        self.embed_dim = embed_dim if embed_dim is not None else window_size
+        self.use_gatv2 = use_gatv2
+        self.num_nodes = n_features
+        self.use_bias = use_bias
+        self.use_softmax = use_softmax
+
+        # Because linear transformation is done after concatenation in GATv2
+        if self.use_gatv2:
+            self.embed_dim *= 2
+            lin_input_dim = 2 * window_size
+            a_input_dim = self.embed_dim
+        else:
+            lin_input_dim = window_size
+            a_input_dim = 2 * self.embed_dim
+
+        self.lin = nn.Linear(lin_input_dim, self.embed_dim)
+        self.a = nn.Parameter(torch.empty((a_input_dim, 1)))
+        nn.init.xavier_uniform_(self.a.data, gain=1.414)
+
+        if self.use_bias:
+            self.bias = nn.Parameter(torch.ones(n_features, n_features))
+
+        self.leakyrelu = nn.LeakyReLU(alpha)
+        self.sigmoid = nn.Sigmoid()
+
+    def forward(self, x):
+        # x shape (b, n, k): b - batch size, n - window size, k - number of features
+        # For feature attention we represent a node as the values of a particular feature across all timestamps
+
+        x = x.permute(0, 2, 1)
+
+        # 'Dynamic' GAT attention
+        # Proposed by Brody et. al., 2021 (https://arxiv.org/pdf/2105.14491.pdf)
+        # Linear transformation applied after concatenation and attention layer applied after leakyrelu
+        if self.use_gatv2:
+            a_input = self._make_attention_input(x)                 # (b, k, k, 2*window_size)
+            a_input = self.leakyrelu(self.lin(a_input))             # (b, k, k, embed_dim)
+            e = torch.matmul(a_input, self.a).squeeze(3)            # (b, k, k, 1)
+
+        # Original GAT attention
+        else:
+            Wx = self.lin(x)                                                  # (b, k, k, embed_dim)
+            a_input = self._make_attention_input(Wx)                          # (b, k, k, 2*embed_dim)
+            e = self.leakyrelu(torch.matmul(a_input, self.a)).squeeze(3)      # (b, k, k, 1)
+
+        if self.use_bias:
+            e += self.bias
+
+        # Attention weights
+        if self.use_softmax:
+            e = torch.softmax(e, dim=2)
+        attention = torch.dropout(e, self.dropout, train=self.training)
+
+        # Computing new node features using the attention
+        h = self.sigmoid(torch.matmul(attention, x))
+
+        return h.permute(0, 2, 1)
+
+    def _make_attention_input(self, v):
+        """Preparing the feature attention mechanism.
+        Creating matrix with all possible combinations of concatenations of node.
+        Each node consists of all values of that node within the window
+            v1 || v1,
+            ...
+            v1 || vK,
+            v2 || v1,
+            ...
+            v2 || vK,
+            ...
+            ...
+            vK || v1,
+            ...
+            vK || vK,
+        """
+
+        K = self.num_nodes
+        blocks_repeating = v.repeat_interleave(K, dim=1)  # Left-side of the matrix
+        blocks_alternating = v.repeat(1, K, 1)  # Right-side of the matrix
+        combined = torch.cat((blocks_repeating, blocks_alternating), dim=2)  # (b, K*K, 2*window_size)
+
+        if self.use_gatv2:
+            return combined.view(v.size(0), K, K, 2 * self.window_size)
+        else:
+            return combined.view(v.size(0), K, K, 2 * self.embed_dim)
+
+
+class TemporalAttentionLayer(nn.Module):
+    """Single Graph Temporal Attention Layer
+    :param n_features: number of input features/nodes
+    :param window_size: length of the input sequence
+    :param dropout: percentage of nodes to dropout
+    :param alpha: negative slope used in the leaky rely activation function
+    :param embed_dim: embedding dimension (output dimension of linear transformation)
+    :param use_gatv2: whether to use the modified attention mechanism of GATv2 instead of standard GAT
+    :param use_bias: whether to include a bias term in the attention layer
+
+    """
+
+    def __init__(self, n_features, window_size, dropout, alpha, embed_dim=None, use_gatv2=True, use_bias=True,
+                 use_softmax=True):
+        super(TemporalAttentionLayer, self).__init__()
+        self.n_features = n_features
+        self.window_size = window_size
+        self.dropout = dropout
+        self.use_gatv2 = use_gatv2
+        self.embed_dim = embed_dim if embed_dim is not None else n_features
+        self.num_nodes = window_size
+        self.use_bias = use_bias
+        self.use_softmax = use_softmax
+
+        # Because linear transformation is performed after concatenation in GATv2
+        if self.use_gatv2:
+            self.embed_dim *= 2
+            lin_input_dim = 2 * n_features
+            a_input_dim = self.embed_dim
+        else:
+            lin_input_dim = n_features
+            a_input_dim = 2 * self.embed_dim
+
+        self.lin = nn.Linear(lin_input_dim, self.embed_dim)
+        self.a = nn.Parameter(torch.empty((a_input_dim, 1)))
+        nn.init.xavier_uniform_(self.a.data, gain=1.414)
+
+        if self.use_bias:
+            self.bias = nn.Parameter(torch.ones(window_size, window_size))
+
+        self.leakyrelu = nn.LeakyReLU(alpha)
+        self.sigmoid = nn.Sigmoid()
+
+    def forward(self, x):
+        # x shape (b, n, k): b - batch size, n - window size, k - number of features
+        # For temporal attention a node is represented as all feature values at a specific timestamp
+
+        # 'Dynamic' GAT attention
+        # Proposed by Brody et. al., 2021 (https://arxiv.org/pdf/2105.14491.pdf)
+        # Linear transformation applied after concatenation and attention layer applied after leakyrelu
+        if self.use_gatv2:
+            a_input = self._make_attention_input(x)              # (b, n, n, 2*n_features)
+            a_input = self.leakyrelu(self.lin(a_input))          # (b, n, n, embed_dim)
+            e = torch.matmul(a_input, self.a).squeeze(3)         # (b, n, n, 1)
+
+        # Original GAT attention
+        else:
+            Wx = self.lin(x)                                                  # (b, n, n, embed_dim)
+            a_input = self._make_attention_input(Wx)                          # (b, n, n, 2*embed_dim)
+            e = self.leakyrelu(torch.matmul(a_input, self.a)).squeeze(3)      # (b, n, n, 1)
+
+        if self.use_bias:
+            e += self.bias  # (b, n, n, 1)
+
+        # Attention weights
+        if self.use_softmax:
+            e = torch.softmax(e, dim=2)
+        attention = torch.dropout(e, self.dropout, train=self.training)
+
+        h = self.sigmoid(torch.matmul(attention, x))    # (b, n, k)
+
+        return h
+
+    def _make_attention_input(self, v):
+        """Preparing the temporal attention mechanism.
+        Creating matrix with all possible combinations of concatenations of node values:
+            (v1, v2..)_t1 || (v1, v2..)_t1
+            (v1, v2..)_t1 || (v1, v2..)_t2
+
+            ...
+            ...
+
+            (v1, v2..)_tn || (v1, v2..)_t1
+            (v1, v2..)_tn || (v1, v2..)_t2
+
+        """
+
+        K = self.num_nodes
+        blocks_repeating = v.repeat_interleave(K, dim=1)  # Left-side of the matrix
+        blocks_alternating = v.repeat(1, K, 1)  # Right-side of the matrix
+        combined = torch.cat((blocks_repeating, blocks_alternating), dim=2)
+
+        if self.use_gatv2:
+            return combined.view(v.size(0), K, K, 2 * self.n_features)
+        else:
+            return combined.view(v.size(0), K, K, 2 * self.embed_dim)
+
+
+class FullAttention(nn.Module):
+    def __init__(self, mask_flag=True, scale=None, attention_dropout=0.1, output_attention=False):
+        super(FullAttention, self).__init__()
+        self.scale = scale
+        self.mask_flag = mask_flag
+        self.output_attention = output_attention
+        self.dropout = nn.Dropout(attention_dropout)
+        self.relu_q = nn.ReLU()
+        self.relu_k = nn.ReLU()
+
+    @staticmethod
+    def TriangularCausalMask(B, L, S, device='cpu'):
+        mask_shape = [B, 1, L, S]
+        with torch.no_grad():
+            mask = torch.triu(torch.ones(mask_shape, dtype=torch.bool), diagonal=1)
+            return mask.to(device)
+
+    def forward(self, queries, keys, values, attn_mask):
+        B, L, H, E = queries.shape
+        _, S, _, D = values.shape
+        scale = self.scale or 1. / sqrt(E) # scale相对于取多少比例，取前1/根号n
+
+        scores = torch.einsum("blhe,bshe->bhls", queries, keys)
+        if self.mask_flag:
+            if attn_mask is None:
+                attn_mask = self.TriangularCausalMask(B, L, S, device=queries.device)
+
+            scores.masked_fill_(attn_mask, 0)
+
+        A = self.dropout(torch.softmax(scale * scores, dim=-1))
+        V = torch.einsum("bhls,bshd->blhd", A, values)
+
+        # queries = self.relu_q(queries)
+        # keys = self.relu_k(keys)
+        # KV = torch.einsum("blhe,bshe->bhls", keys, values)
+        # A = self.dropout(scale * KV)
+        # V = torch.einsum("bshd,bhls->blhd", queries, A)
+
+        if self.output_attention:
+            return (V.contiguous(), A)
+        else:
+            return (V.contiguous(), None)
+
+
+class ProbAttention(nn.Module):
+    def __init__(self, mask_flag=True, factor=2, scale=None, attention_dropout=0.1, output_attention=False):
+        super(ProbAttention, self).__init__()
+        self.factor = factor
+        self.scale = scale
+        self.mask_flag = mask_flag
+        self.output_attention = output_attention
+
+    @staticmethod
+    def ProbMask(B, H, D, index, scores, device='cpu'):
+        _mask = torch.ones(D, scores.shape[-2], dtype=torch.bool).triu(1)
+        _mask_ex = _mask[None, None, :].expand(B, H, D, scores.shape[-2])
+        indicator = _mask_ex.transpose(-2, -1)[torch.arange(B)[:, None, None],
+                    torch.arange(H)[None, :, None],
+                    index, :].transpose(-2, -1)
+        mask = indicator.view(scores.shape)
+        return mask.to(device)
+
+    def _prob_KV(self, K, V, sample_v, n_top):  # n_top: c*ln(L_q)
+        # Q [B, H, L, D]
+        B, H, L, E_V = V.shape
+        _, _, _, E_K = K.shape
+
+        # calculate the sampled K_V
+
+        V_expand = V.transpose(-2, -1).unsqueeze(-2).expand(B, H, E_V, E_K, L)
+        index_sample = torch.randint(E_V, (E_K, sample_v))  # real U = U_part(factor*ln(L_k))*L_q
+        V_sample = V_expand[:, :, torch.arange(E_V).unsqueeze(1), index_sample, :]
+        K_V_sample = torch.matmul(K.transpose(-2, -1).unsqueeze(-2), V_sample.transpose(-2, -1)).squeeze()
+
+        # find the Top_k query with sparisty measurement
+        M = K_V_sample.max(-1)[0] - torch.div(K_V_sample.sum(-1), E_V)
+        M_top = M.topk(n_top, sorted=False)[1]
+
+        # use the reduced Q to calculate Q_K
+        V_reduce = V.transpose(-2, -1)[torch.arange(B)[:, None, None],
+                   torch.arange(H)[None, :, None],
+                   M_top, :].transpose(-2, -1)  # factor*ln(L_q)
+        K_V = torch.matmul(K.transpose(-2, -1), V_reduce)  # factor*ln(L_q)*L_k
+        #
+        return K_V, M_top
+
+    def _get_initial_context(self, V, L_Q):
+        B, H, L_V, D = V.shape
+        if not self.mask_flag:
+            # V_sum = V.sum(dim=-2)
+            V_sum = V.mean(dim=-2)
+            contex = V_sum.unsqueeze(-2).expand(B, H, L_Q, V_sum.shape[-1]).clone()
+        else:  # use mask
+            assert (L_Q == L_V)  # requires that L_Q == L_V, i.e. for self-attention only
+            contex = V.cumsum(dim=-2)
+        return contex
+
+    def _update_context(self, context_in, Q, scores, index, D_K, attn_mask):
+        B, H, L, D_Q = Q.shape
+
+        if self.mask_flag:
+            attn_mask = self.ProbMask(B, H, D_K, index, scores, device=Q.device)
+            scores.masked_fill_(attn_mask, -np.inf)
+
+        attn = torch.softmax(scores, dim=-1)  # nn.Softmax(dim=-1)(scores)
+
+        context_in.transpose(-2, -1)[torch.arange(B)[:, None, None],
+        torch.arange(H)[None, :, None],
+        index, :] = torch.matmul(Q, attn).type_as(context_in).transpose(-2, -1)
+        if self.output_attention:
+            attns = (torch.ones([B, H, D_K, D_K]) / D_K).type_as(attn)
+            attns[torch.arange(B)[:, None, None], torch.arange(H)[None, :, None], index, :] = attn
+            return (context_in, attns)
+        else:
+            return (context_in, None)
+
+    def forward(self, queries, keys, values, attn_mask):
+        # B, L_Q, H, D = queries.shape
+        # _, L_K, _, _ = keys.shape
+
+        B, L, H, D_K = keys.shape
+        _, _, _, D_V = values.shape
+
+        queries = queries.transpose(2, 1)
+        keys = keys.transpose(2, 1)
+        values = values.transpose(2, 1)
+
+        U_part = self.factor * np.ceil(np.log(D_V)).astype('int').item()  # c*ln(L_k)
+        u = self.factor * np.ceil(np.log(D_K)).astype('int').item()  # c*ln(L_q)
+
+        U_part = U_part if U_part < D_V else D_V
+        u = u if u < D_K else D_K
+
+        scores_top, index = self._prob_KV(keys, values, sample_v=U_part, n_top=u)
+
+        # add scale factor
+        scale = self.scale or 1. / sqrt(D_K)
+        if scale is not None:
+            scores_top = scores_top * scale
+        # get the context
+        context = self._get_initial_context(queries, L)
+        # update the context with selected top_k queries
+        context, attn = self._update_context(context, queries, scores_top, index, D_K, attn_mask)
+
+        return context.contiguous(), attn
+
+
+class AttentionBlock(nn.Module):
+    def __init__(self, d_model, n_model, n_heads=8, d_keys=None, d_values=None):
+        super(AttentionBlock, self).__init__()
+
+        d_keys = d_keys or (d_model // n_heads)
+        d_values = d_values or (d_model // n_heads)
+        self.inner_attention = FullAttention()
+        # self.inner_attention = ProbAttention(device=device)
+        self.query_projection = nn.Linear(d_model, d_keys * n_heads)
+        self.key_projection = nn.Linear(d_model, d_keys * n_heads)
+        self.value_projection = nn.Linear(d_model, d_values * n_heads)
+        self.out_projection = nn.Linear(d_values * n_heads, d_model)
+        self.n_heads = n_heads
+        self.layer_norm = nn.LayerNorm(d_model, eps=1e-6)
+
+    def forward(self, queries, keys, values, attn_mask):
+        '''
+        Q: [batch_size, len_q, d_k]
+        K: [batch_size, len_k, d_k]
+        V: [batch_size, len_v(=len_k), d_v]
+        attn_mask: [batch_size, seq_len, seq_len]
+        '''
+        batch_size, len_q, _ = queries.shape
+        _, len_k, _ = keys.shape
+
+        queries = self.query_projection(queries).view(batch_size, len_q, self.n_heads, -1)
+        keys = self.key_projection(keys).view(batch_size, len_k, self.n_heads, -1)
+        values = self.value_projection(values).view(batch_size, len_k, self.n_heads, -1)
+
+        out, attn = self.inner_attention(
+            queries,
+            keys,
+            values,
+            attn_mask
+        )
+        out = out.view(batch_size, len_q, -1)
+        out = self.out_projection(out)
+        out = self.layer_norm(out)
+        return out, attn
+
+
+class GRULayer(nn.Module):
+    """Gated Recurrent Unit (GRU) Layer
+    :param in_dim: number of input features
+    :param hid_dim: hidden size of the GRU
+    :param n_layers: number of layers in GRU
+    :param dropout: dropout rate
+    """
+
+    def __init__(self, in_dim, hid_dim, n_layers, dropout):
+        super(GRULayer, self).__init__()
+        self.hid_dim = hid_dim
+        self.n_layers = n_layers
+        self.dropout = 0.0 if n_layers == 1 else dropout
+        self.gru = nn.GRU(in_dim, hid_dim, num_layers=n_layers, batch_first=True, dropout=self.dropout)
+
+    def forward(self, x):
+        out, h = self.gru(x)
+        out, h = out[-1, :, :], h[-1, :, :]  # Extracting from last layer
+        return out, h
+
+
+class RNNDecoder(nn.Module):
+    """GRU-based Decoder network that converts latent vector into output
+    :param in_dim: number of input features
+    :param n_layers: number of layers in RNN
+    :param hid_dim: hidden size of the RNN
+    :param dropout: dropout rate
+    """
+
+    def __init__(self, in_dim, hid_dim, n_layers, dropout):
+        super(RNNDecoder, self).__init__()
+        self.in_dim = in_dim
+        self.dropout = 0.0 if n_layers == 1 else dropout
+        self.rnn = nn.GRU(in_dim, hid_dim, n_layers, batch_first=True, dropout=self.dropout)
+
+    def forward(self, x):
+        decoder_out, _ = self.rnn(x)
+        return decoder_out
+
+
+class ReconstructionModel(nn.Module):
+    """Reconstruction Model
+    :param window_size: length of the input sequence
+    :param in_dim: number of input features
+    :param n_layers: number of layers in RNN
+    :param hid_dim: hidden size of the RNN
+    :param in_dim: number of output features
+    :param dropout: dropout rate
+    """
+
+    def __init__(self, window_size, in_dim, hid_dim, out_dim, n_layers, dropout):
+        super(ReconstructionModel, self).__init__()
+        self.window_size = window_size
+        self.decoder = RNNDecoder(in_dim, hid_dim, n_layers, dropout)
+        self.fc = nn.Linear(hid_dim, out_dim)
+
+    def forward(self, x):
+        # x will be last hidden state of the GRU layer
+        h_end = x
+        h_end_rep = h_end.repeat_interleave(self.window_size, dim=1).view(x.size(0), self.window_size, -1)
+
+        decoder_out = self.decoder(h_end_rep)
+        out = self.fc(decoder_out)
+        return out
+
+
+class Forecasting_Model(nn.Module):
+    """Forecasting model (fully-connected network)
+    :param in_dim: number of input features
+    :param hid_dim: hidden size of the FC network
+    :param out_dim: number of output features
+    :param n_layers: number of FC layers
+    :param dropout: dropout rate
+    """
+
+    def __init__(self, in_dim, hid_dim, out_dim, n_layers, dropout):
+        super(Forecasting_Model, self).__init__()
+        layers = [nn.Linear(in_dim, hid_dim)]
+        for _ in range(n_layers - 1):
+            layers.append(nn.Linear(hid_dim, hid_dim))
+
+        layers.append(nn.Linear(hid_dim, out_dim))
+
+        self.layers = nn.ModuleList(layers)
+        self.dropout = nn.Dropout(dropout)
+        self.relu = nn.ReLU()
+
+    def forward(self, x):
+        for i in range(len(self.layers) - 1):
+            x = self.relu(self.layers[i](x))
+            x = self.dropout(x)
+        return self.layers[-1](x)
+
+
+class Model(nn.Module):
+    """ MTAD_GAT model class.
+
+    :param n_features: Number of input features
+    :param window_size: Length of the input sequence
+    :param out_dim: Number of features to output
+    :param kernel_size: size of kernel to use in the 1-D convolution
+    :param feat_gat_embed_dim: embedding dimension (output dimension of linear transformation)
+           in feat-oriented GAT layer
+    :param time_gat_embed_dim: embedding dimension (output dimension of linear transformation)
+           in time-oriented GAT layer
+    :param use_gatv2: whether to use the modified attention mechanism of GATv2 instead of standard GAT
+    :param gru_n_layers: number of layers in the GRU layer
+    :param gru_hid_dim: hidden dimension in the GRU layer
+    :param forecast_n_layers: number of layers in the FC-based Forecasting Model
+    :param forecast_hid_dim: hidden dimension in the FC-based Forecasting Model
+    :param recon_n_layers: number of layers in the GRU-based Reconstruction Model
+    :param recon_hid_dim: hidden dimension in the GRU-based Reconstruction Model
+    :param dropout: dropout rate
+    :param alpha: negative slope used in the leaky rely activation function
+
+    """
+
+    def __init__(self, customs: dict, dataloader: tud.DataLoader = None):
+        super(Model, self).__init__()
+        n_features = dataloader.dataset.train_inputs.shape[-1]
+        window_size = int(customs["input_size"])
+        out_dim = n_features
+        kernel_size = 7
+        feat_gat_embed_dim = None
+        time_gat_embed_dim = None
+        use_gatv2 = True
+        gru_n_layers = 1
+        gru_hid_dim = 150
+        forecast_n_layers = 1
+        forecast_hid_dim = 150
+        recon_n_layers = 1
+        recon_hid_dim = 150
+        dropout = 0.2
+        alpha = 0.2
+        optimize = True
+
+        self.name = "MtadGatAtt"
+        self.optimize = optimize
+        use_softmax = not optimize
+
+        self.conv = ConvLayer(n_features, kernel_size)
+        self.feature_gat = FeatureAttentionLayer(
+            n_features, window_size, dropout, alpha, feat_gat_embed_dim, use_gatv2, use_softmax=use_softmax)
+        self.temporal_gat = TemporalAttentionLayer(n_features, window_size, dropout, alpha, time_gat_embed_dim,
+                                                   use_gatv2, use_softmax=use_softmax)
+        self.forecasting_model = Forecasting_Model(
+            gru_hid_dim, forecast_hid_dim, out_dim, forecast_n_layers, dropout)
+        if optimize:
+            self.encode = AttentionBlock(3 * n_features, window_size)
+            self.encode_feature = nn.Linear(3 * n_features * window_size, gru_hid_dim)
+            self.decode_feature = nn.Linear(gru_hid_dim, n_features * window_size)
+            self.decode = AttentionBlock(n_features, window_size)
+        else:
+            self.gru = GRULayer(3 * n_features, gru_hid_dim, gru_n_layers, dropout)
+            self.recon_model = ReconstructionModel(window_size, gru_hid_dim, recon_hid_dim, out_dim, recon_n_layers,
+                                                   dropout)
+
+    def forward(self, x):
+        x = self.conv(x)
+        h_feat = self.feature_gat(x)
+        h_temp = self.temporal_gat(x)
+        h_cat = torch.cat([x, h_feat, h_temp], dim=2)  # (b, n, 3k)
+
+        if self.optimize:
+            h_end, _ = self.encode(h_cat, h_cat, h_cat, None)
+            h_end = self.encode_feature(h_end.reshape(h_end.size(0), -1))
+        else:
+            _, h_end = self.gru(h_cat)
+            h_end = h_end.view(x.shape[0], -1)  # Hidden state for last timestamp
+
+        predictions = self.forecasting_model(h_end)
+
+        if self.optimize:
+            h_end = self.decode_feature(h_end)
+            h_end = h_end.reshape(x.shape[0], x.shape[1], x.shape[2])
+            recons, _ = self.decode(h_end, h_end, h_end, None)
+        else:
+            recons = self.recon_model(h_end)
+
+        return predictions, recons
+
+    def loss(self, x, y_true, epoch: int = None, device: str = "cpu"):
+        preds, recons = self.forward(x)
+
+        if preds.ndim == 3:
+            preds = preds.squeeze(1)
+        if y_true.ndim == 3:
+            y_true = y_true.squeeze(1)
+        forecast_criterion = nn.MSELoss()
+        recon_criterion = nn.MSELoss()
+
+        forecast_loss = torch.sqrt(forecast_criterion(y_true, preds))
+        recon_loss = torch.sqrt(recon_criterion(x, recons))
+
+        loss = forecast_loss + recon_loss
+        loss.backward()
+        return loss.item()
+
+    def detection(self, x, y_true, epoch: int = None, device: str = "cpu"):
+        preds, recons = self.forward(x)
+        score = F.pairwise_distance(recons.reshape(recons.size(0), -1), x.reshape(x.size(0), -1)) + F.pairwise_distance(y_true.reshape(y_true.size(0), -1), preds.reshape(preds.size(0), -1))
+        return score, None
+
+
+if __name__ == "__main__":
+    from tqdm import tqdm
+    import time
+    epoch = 10000
+    batch_size = 1
+    # device = 'cuda:1' if torch.cuda.is_available() else 'cpu'
+    device = 'cpu'
+    input_len_list = [30, 60, 90, 120, 150, 180, 210, 240, 270, 300]
+    for input_len in input_len_list:
+        model = Model(52, input_len, 52, optimize=False, device=device).to(device)
+        a = torch.Tensor(torch.ones((batch_size, input_len, 52))).to(device)
+        start = time.time()
+        for i in tqdm(range(epoch)):
+            model(a)
+        end = time.time()
+        speed1 = batch_size * epoch / (end - start)
+
+        model = Model(52, input_len, 52, optimize=True, device=device).to(device)
+        a = torch.Tensor(torch.ones((batch_size, input_len, 52))).to(device)
+        start = time.time()
+        for i in tqdm(range(epoch)):
+            model(a)
+        end = time.time()
+        speed2 = batch_size * epoch / (end - start)
+        print(input_len, (speed2 - speed1)/speed1, speed1, speed2)
+
--- a/method/init.py
+++ b/method/init.py
@@ -0,0 +1,3 @@
+from .MtadGat import Model
+from .MtadGatAtt import Model
+from .AnomalyTransformer import Model
--- a/method/template.py
+++ b/method/template.py
@@ -0,0 +1,50 @@
+import torch.nn as nn
+import torch.utils.data as tud
+import torch
+
+
+class Model(nn.Module):
+    def __init__(self, customs: dict, dataloader: tud.DataLoader = None):
+        """
+        :param customs: 自定义参数，内容取自于config.ini文件的[CustomParameters]部分。
+        :param dataloader: 数据集初始化完成的dataloader。在自定义的预处理方法文件中，可以增加内部变量或者方法，提供给模型。
+                           例如：模型初始化需要数据的维度数量，可通过n_features = dataloader.dataset.train_inputs.shape[-1]获取
+                                或在预处理方法的MyDataset类中，定义self.n_features = self.train_inputs.shape[-1]，
+                                通过n_features = dataloader.dataset.n_features获取
+        """
+        super(Model, self).__init__()
+
+    def forward(self, x):
+        """
+        :param x: 模型的输入，在本工具中为MyDataset类中__getitem__方法返回的三个变量中的第一个变量。
+        :return: 模型的输出，可以自定义
+        """
+        return None
+
+    def loss(self, x, y_true, epoch: int = None, device: str = "cpu"):
+        """
+        计算loss。注意，计算loss时如采用torch之外的库计算会造成梯度截断，请全部使用torch的方法
+        :param x: 输入数据
+        :param y_true: 真实输出数据
+        :param epoch: 当前是第几个epoch
+        :param device: 设备，cpu或者cuda
+        :return: loss值
+        """
+        y_pred = self.forward(x)  # 模型的输出
+        loss = torch.Tensor([1])  # 示例，请修改
+        loss.backward()
+        return loss.item()
+
+    def detection(self, x, y_true, epoch: int = None, device: str = "cpu"):
+        """
+        检测方法，可以输出异常的分数，也可以输出具体的标签。
+        如输出异常分数，则后续会根据异常分数自动划分阈值，高于阈值的为异常，自动赋予标签；如输出标签，则直接进行评估。
+        :param x: 输入数据
+        :param y_true: 真实输出数据
+        :param epoch: 当前是第几个epoch
+        :param device: 设备，cpu或者cuda
+        :return: score，label。如选择输出异常的分数，则输出score，label为None；如选择输出标签，则输出label，score为None。
+                 score的格式为torch的Tensor格式，尺寸为[batch_size]；label的格式为torch的IntTensor格式，尺寸为[batch_size]
+        """
+        y_pred = self.forward(x)  # 模型的输出
+        return None, None
--- a/preprocess/init.py
+++ b/preprocess/init.py
@@ -0,0 +1 @@
+from .standardization import MyDataset
--- a/preprocess/standardization.py
+++ b/preprocess/standardization.py
@@ -0,0 +1,119 @@
+from torch.utils.data import Dataset
+from torch import float32, Tensor
+from numpy import array, where
+
+
+class MyDataset(Dataset):
+    def __init__(self, name: str, train_path: str = None, test_path: str = None, input_size: int = 1,
+                 output_size: int = 1, step: int = 1, mode: str = 'train', time_index: bool = True,
+                 del_column_name: bool = True):
+        """
+        可以将csv文件批量转成tensor
+        :param name: 数据集名称。
+        :param train_path: 训练数据集路径。
+        :param test_path: 测试数据集路径。
+        :param input_size: 输入数据长度。
+        :param output_size: 输出数据长度。
+        :param step: 截取数据的窗口移动间隔。
+        :param mode: train或者test，用于指示使用训练集数据还是测试集数据。
+        :param time_index: True为第一列是时间戳，False为不。
+        :param del_column_name: 文件中第一行为列名时，使用True。
+        """
+        self.name = name
+        self.input_size = input_size
+        self.output_size = output_size
+        self.del_column_name = del_column_name
+        self.step = step
+        self.mode = mode
+        self.time_index = time_index
+        self.train_inputs, self.train_labels, self.train_outputs, self.test_inputs, self.test_labels, self.test_outputs\
+            = self.parse_data(train_path, test_path)
+        self.train_inputs = Tensor(self.train_inputs).to(float32) if self.train_inputs is not None else None
+        self.train_labels = Tensor(self.train_labels).to(float32) if self.train_labels is not None else None
+        self.train_outputs = Tensor(self.train_outputs).to(float32) if self.train_outputs is not None else None
+        self.test_inputs = Tensor(self.test_inputs).to(float32) if self.test_inputs is not None else None
+        self.test_labels = Tensor(self.test_labels).to(float32) if self.test_labels is not None else None
+        self.test_outputs = Tensor(self.test_outputs).to(float32) if self.test_outputs is not None else None
+
+    def parse_data(self, train_path: str = None, test_path: str = None):
+        if train_path is None and test_path is None:
+            raise ValueError("train_path is None and test_path is None.")
+
+        mean = None
+        deviation = None
+        train_data_input, train_label, train_data_output = None, None, None
+        test_data_input, test_label, test_data_output = None, None, None
+
+        # 读取训练集数据
+        if train_path:
+            train_data = []
+            train_label = []
+            with open(train_path, 'r', encoding='utf8') as f:
+                if self.del_column_name is True:
+                    data = f.readlines()[1:]
+                else:
+                    data = f.readlines()
+            train_data.extend([list(map(float, line.strip().split(','))) for line in data])
+            train_label.extend([0 for _ in data])
+            train_np = array(train_data)
+            if self.time_index:
+                train_np[:, 0] = train_np[:, 0] % 86400
+            mean = train_np.mean(axis=0)  # 计算平均数
+            deviation = train_np.std(axis=0)  # 计算标准差
+            deviation = where(deviation != 0, deviation, 1)
+            train_np = (train_np - mean) / deviation  # 标准化
+            train_data = train_np.tolist()
+            train_data_input, train_data_output, train_label = self.cut_data(train_data, train_label)
+
+        # 读取测试集数据
+        if test_path:
+            test_data = []
+            test_label = []
+            with open(test_path, 'r', encoding='utf8') as f:
+                if self.del_column_name is True:
+                    data = f.readlines()[1:]
+                else:
+                    data = f.readlines()
+            test_data.extend([list(map(float, line.strip().split(',')))[:-1] for line in data])
+            test_label.extend([int(line.strip().split(',')[-1]) for line in data])
+            test_np = array(test_data)
+            if self.time_index:
+                test_np[:, 0] = test_np[:, 0] % 86400
+            # mean = test_np.mean(axis=0)  # 计算平均数
+            # deviation = test_np.std(axis=0)  # 计算标准差
+            # deviation = where(deviation != 0, deviation, 1)
+            test_np = (test_np - mean) / deviation  # 标准化
+            test_data = test_np.tolist()
+            # 自动判断是否需要反转标签。异常标签统一认为是1，当异常标签超过一半时，需反转标签
+            if sum(test_label) > 0.5*len(test_label):
+                test_label = (1-array(test_label)).tolist()
+            test_data_input, test_data_output, test_label = self.cut_data(test_data, test_label)
+
+        return train_data_input, train_label, train_data_output, test_data_input, test_label, test_data_output
+
+    def cut_data(self, data: [[float]], label: [int]):
+        n = 0
+        input_data, output_data, anomaly_label = [], [], []
+        while n + self.input_size + self.output_size <= len(data):
+            input_data.append(data[n: n + self.input_size])
+            output_data.append(data[n + self.input_size: n + self.input_size + self.output_size])
+            anomaly_label.append([max(label[n + self.input_size: n + self.input_size + self.output_size])])
+            n = n + self.step
+        return input_data.copy(), output_data.copy(), anomaly_label.copy()
+
+    def __len__(self):
+        if self.mode == 'train':
+            return self.train_inputs.shape[0]
+        elif self.mode == 'test':
+            return self.test_inputs.shape[0]
+
+    def __getitem__(self, idx):
+        if self.mode == 'train':
+            return self.train_inputs[idx], self.train_labels[idx], self.train_outputs[idx]
+        elif self.mode == 'test':
+            return self.test_inputs[idx], self.test_labels[idx], self.test_outputs[idx]
+
+
+if __name__ == "__main__":
+    app = MyDataset('../dataset/SWAT/train.csv', test_path='../dataset/SWAT/test.csv', input_size=3)
+    print(app)
--- a/preprocess/template.py
+++ b/preprocess/template.py
@@ -0,0 +1,61 @@
+"""
+模板文件，有自定义预处理方法可以通过编辑本文件实现数据集预处理。
+编辑完成以后，请将文件名修改，写入config.ini，并在同级目录下init文件添加本文件
+"""
+
+
+from torch.utils.data import Dataset
+
+
+class DataSet(Dataset):
+    def __init__(self, train_path: str = None, test_path: str = None, input_size: int = 1, output_size: int = 1,
+                 step: int = 1, mode: str = 'train', del_time: bool = True, del_column_name: bool = True,
+                 reverse_label: bool = True):
+        """
+        可以将csv文件批量转成tensor
+
+        注意：必须包含以下变量或方法。
+        变量：self.train_inputs、self.train_labels、self.train_outputs
+             self.test_inputs、self.test_labels、self.test_outputs、self.mode
+        方法：__len__()、__getitem__()
+
+        :param train_path: str类型。训练数据集路径。
+        :param test_path: str类型。测试数据集路径。
+        :param input_size: int类型。输入数据长度。
+        :param output_size: int类型。输出数据长度。
+        :param step: int类型。截取数据的窗口移动间隔。
+        :param mode: str类型。train或者test，用于指示使用训练集数据还是测试集数据。
+        :param del_time: bool类型。True为删除时间戳列，False为不删除。
+        :param del_column_name: bool类型。文件中第一行为列名时，使用True。
+        :param reverse_label: bool类型。转化标签，0和1互换。标签统一采用正常为0异常为1的格式，若原文件中不符和该规定，使用True。
+        """
+
+        self.mode = mode
+        self.train_inputs = None # 训练时的输入数据，Tensor格式，尺寸为[N,L,D]。N表示训练数据的数量，L表示每条数据的长度（由多少个时间点组成的数据），D表示数据维度数量
+        self.train_labels = None # 训练时的数据标签，Tensor格式，尺寸为[N,1]。
+        self.train_outputs = None # 训练时的输出数据，Tensor格式，尺寸为[N,L,D]。
+        self.test_inputs = None # 测试时的输入数据，Tensor格式，尺寸为[N,L,D]。
+        self.test_labels = None # 测试时的数据标签，Tensor格式，尺寸为[N,1]。
+        self.test_outputs = None # 测试时的输出数据，Tensor格式，尺寸为[N,L,D]。
+
+    def __len__(self):
+        """
+        提供数据集长度
+        :return: 测试集或者训练集数据长度N
+        """
+        if self.mode == 'train':
+            return self.train_inputs.shape[0]
+        elif self.mode == 'test':
+            return self.test_inputs.shape[0]
+
+    def __getitem__(self, idx):
+        """
+        获取数据
+        :param idx: 数据序号
+        :return: 对应的输入数据、标签、输出数据
+        """
+        if self.mode == 'train':
+            return self.train_inputs[idx], self.train_labels[idx], self.train_outputs[idx]
+        elif self.mode == 'test':
+            return self.test_inputs[idx], self.test_labels[idx], self.test_outputs[idx]
+
--- a/records/2023-04-06_14-37-45/log
+++ b/records/2023-04-06_14-37-45/log
--- a/records/2023-04-10_10-30-27/detection_result/MtadGatAtt_SWAT.csv
+++ b/records/2023-04-10_10-30-27/detection_result/MtadGatAtt_SWAT.csv
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,8 @@
+eventlet==0.33.3
+gevent==22.10.2
+loguru==0.6.0
+numpy==1.21.2
+pandas==1.4.1
+scikit_learn==1.2.2
+torch==1.10.2
+tqdm==4.63.1
--- a/utils.py
+++ b/utils.py
@@ -0,0 +1,318 @@
+import os
+import numpy
+import torch.nn as nn
+import torch
+import torch.utils.data as tud
+import preprocess
+import evaluation
+from loguru import logger
+import time
+from tqdm import tqdm
+
+
+def create_dataloader(dataset_name: str, input_size: int = 1, output_size: int = 1, step: int = 1, batch_size: int = 1,
+                      time_index: bool = True, del_column_name: bool = True,
+                      preprocess_name: str = "standardization") -> (tud.DataLoader, tud.DataLoader):
+    """
+    针对一个数据集，构建Dataloader
+    :param dataset_name: 数据集名称
+    :param input_size: 输入数据长度
+    :param output_size: 输出数据长度
+    :param step: 截取数据的窗口移动间隔
+    :param batch_size: batch的大小
+    :param time_index: True为第一列是时间戳，False为不
+    :param del_column_name: 文件中第一行为列名时，使用True
+    :param preprocess_name: 预处理方法
+    :return: 训练数据与测试数据
+    """
+    ds = eval(f"preprocess.{preprocess_name}.MyDataset")(name=dataset_name.replace("/", "-"),
+                                                         train_path=f"./dataset/{dataset_name}/train.csv",
+                                                         test_path=f"./dataset/{dataset_name}/test.csv",
+                                                         input_size=input_size, output_size=output_size,
+                                                         step=step, time_index=time_index,
+                                                         del_column_name=del_column_name)
+    normal_dl = tud.DataLoader(dataset=ds, batch_size=batch_size, shuffle=True)
+    ds.mode = "test"
+    attack_dl = tud.DataLoader(dataset=ds, batch_size=batch_size, shuffle=False)
+    return normal_dl, attack_dl
+
+
+def create_all_dataloader(datasets: [str], input_size: int = 1, output_size: int = 1, step: int = 1,
+                          batch_size: int = 1, time_index: bool = True, del_column_name: bool = True,
+                          preprocess_name: str = "standardization") -> [{}]:
+    """
+    对所有数据集构建dataloader
+    :param datasets: 数据集列表
+    :param input_size: 输入数据长度
+    :param output_size: 输出数据长度
+    :param step: 截取数据的窗口移动间隔
+    :param batch_size: batch的大小
+    :param time_index: True为第一列是时间戳，False为不。
+    :param del_column_name: 文件中第一行为列名时，使用True
+    :param preprocess_name: 预处理方法
+    :return: 所有数据集的dataloader构建结果
+    """
+    all_dataloader = []
+    for dataset_name in datasets:
+        logger.info(f'开始建立 dataloader {dataset_name}')
+        if "train.csv" in os.listdir(f"./dataset/{dataset_name}"):
+            normal_dl, attack_dl = create_dataloader(dataset_name=dataset_name, input_size=input_size,
+                                                     output_size=output_size, step=step, batch_size=batch_size,
+                                                     time_index=time_index, del_column_name=del_column_name,
+                                                     preprocess_name=preprocess_name)
+            all_dataloader.append([{
+                'dataset_name': dataset_name,
+                'normal': normal_dl,
+                'attack': attack_dl
+            }])
+        else:
+            all_sub_dataloader = []
+            for sub_dataset_dir in os.listdir(f"./dataset/{dataset_name}"):
+                sub_dataset_name = f"{dataset_name}/{sub_dataset_dir}"
+                normal_dl, attack_dl = create_dataloader(dataset_name=sub_dataset_name, input_size=input_size,
+                                                         output_size=output_size, step=step, batch_size=batch_size,
+                                                         del_time=del_time, del_column_name=del_column_name)
+                all_sub_dataloader.append({
+                    'dataset_name': sub_dataset_name.replace("/", "-"),
+                    'normal': normal_dl,
+                    'attack': attack_dl
+                })
+            all_dataloader.append(all_sub_dataloader)
+
+        logger.info(f'完成建立 dataloader {dataset_name}')
+    return all_dataloader
+
+
+class EvaluationScore:
+    def __init__(self, evaluations: [str], attack=1):
+        """
+        用于自动划分阈值并进行批量评估
+        :param evaluations: 使用的评估方法名称（需在evaluation文件夹中进行定义）
+        :param attack: 异常的标签，0 or 1
+        """
+        self.time = 0
+        self.f1 = 0
+        self.f1_pa = 0
+        self.f_tad = 0
+        self.attack = attack
+        self.normal = 1 - attack
+        self._total_y_loss = None
+        self._total_label = None
+        self._total_pred_label = None
+        self.true_pred_df = None
+        self.true_pred_dict = None
+        self.evaluations = evaluations
+        self.scores = {}
+
+    def add_data(self, y_loss, true_label, pred_label=None):
+        """
+        添加每个batch的数据
+        :param y_loss: 数据偏差
+        :param true_label: 真实数据标签
+        :param pred_label: 预测标签
+        """
+        if pred_label is not None:
+            if self._total_label is None and self._total_pred_label is None:
+                self._total_label = true_label
+                self._total_pred_label = pred_label
+            else:
+                self._total_label = torch.cat([self._total_label, true_label], dim=0)
+                self._total_pred_label = torch.cat([self._total_pred_label, pred_label], dim=0)
+            return
+
+        y_loss = y_loss.view(-1).cpu().detach().numpy()
+        true_label = true_label.view(-1).cpu().detach().numpy()
+
+        if self._total_y_loss is None and self._total_label is None:
+            self._total_y_loss = y_loss
+            self._total_label = true_label
+            return
+        self._total_y_loss = numpy.concatenate((self._total_y_loss, y_loss), axis=0)
+        self._total_label = numpy.concatenate((self._total_label, true_label), axis=0)
+
+    def best_threshold(self, true_label: list, y_loss: list) -> dict:
+        ret = {}
+        for func_name in self.evaluations:
+            threshold_max = max(y_loss)
+            threshold_min = 0
+            best_threshold = 0
+            for _ in range(5):
+                threshold_list = [threshold_max - i * (threshold_max - threshold_min) / 10 for i in range(11)]
+                f1_list = []
+                for threshold_one in threshold_list:
+                    prediction_loss = numpy.where(numpy.array(y_loss) > threshold_one, self.attack, self.normal)
+                    f1 = eval(f"evaluation.{func_name}.evaluate")(y_true=true_label, y_pred=prediction_loss.tolist())
+                    f1_list.append(f1)
+                ind = f1_list.index(max(f1_list))
+                best_threshold = threshold_list[ind]
+                if ind == 0:
+                    threshold_max = threshold_list[ind]
+                    threshold_min = threshold_list[ind+1]
+                elif ind == len(threshold_list)-1:
+                    threshold_max = threshold_list[ind-1]
+                    threshold_min = threshold_list[ind]
+                else:
+                    threshold_max = threshold_list[ind-1]
+                    threshold_min = threshold_list[ind+1]
+            ret[func_name] = best_threshold
+        return ret
+
+    def auto_threshold(self):
+        if self._total_pred_label is not None:
+            return
+        self._total_y_loss[numpy.isnan(self._total_y_loss)] = 0
+        self._total_y_loss = self._total_y_loss / max(self._total_y_loss)
+        thresholds = self.best_threshold(
+            self._total_label.reshape(-1).data.tolist(), self._total_y_loss.reshape(-1).data.tolist())
+        self.true_pred_dict = {
+            'true': self._total_label.squeeze().tolist()
+        }
+        for func_name in thresholds:
+            self.true_pred_dict[func_name] = \
+                numpy.where(self._total_y_loss > thresholds[func_name], self.attack, self.normal).squeeze().tolist()
+        # self.true_pred_df = pandas.DataFrame(self.true_pred_dict)
+        for func_name in self.true_pred_dict:
+            if func_name == "true":
+                continue
+            self.scores[func_name] = self.get_score(func_name)
+
+    def get_score(self, func_name):
+        if self._total_pred_label is not None:
+            return eval(f"evaluation.{func_name}.evaluate")(self._total_label.reshape(-1).tolist(),
+                                                            self._total_pred_label.reshape(-1).tolist())
+        return eval(f"evaluation.{func_name}.evaluate")(self._total_label.reshape(-1).tolist(),
+                                                        self.true_pred_dict[f"{func_name}"])
+
+    def __str__(self):
+        res = ""
+        for func_name in self.scores:
+            res += f"{func_name}={self.scores[func_name]:.3f} "
+        return res[:-1]
+
+
+def train_model(epoch: int, optimizer: torch.optim, dataloader: tud.DataLoader, model: nn.Module,
+                device: str = "cpu") -> (nn.Module, str):
+    """
+    训练模型
+    :param epoch: 当前训练轮数
+    :param optimizer: 优化器
+    :param dataloader: 数据集
+    :param model: 模型
+    :param device: 训练设备使用cpu还是gpu
+    :return: 训练完成的模型；训练完成需要输出的信息
+    """
+    model.train()
+    avg_loss = []
+    dataloader.dataset.mode = "train"
+    start_time = time.time()
+    with tqdm(total=len(dataloader), ncols=150) as _tqdm:
+        _tqdm.set_description(f'（进度条部分不会写进本地日志）epoch:{epoch}，训练进度')
+        for data in dataloader:
+            x = data[0].to(device)
+            y_true = data[2].to(device)
+            optimizer.zero_grad()
+            loss = model.loss(x=x, y_true=y_true, epoch=epoch, device=device)
+            avg_loss.append(loss)
+            optimizer.step()
+            _tqdm.set_postfix(loss='{:.6f}'.format(sum(avg_loss) / len(avg_loss)))
+            _tqdm.update(1)
+    end_time = time.time()
+    info = f"epoch={epoch}, average loss={'{:.6f}'.format(sum(avg_loss) / len(avg_loss))}, " \
+           f"train time={'{:.1f}'.format(end_time-start_time)}s"
+    return model, info
+
+
+def test_model(dataloader: tud.DataLoader, model: nn.Module, evaluations: [str], device: str = "cpu") -> \
+        (EvaluationScore, str):
+    """
+    测试模型
+    :param dataloader: 数据集
+    :param model: 模型
+    :param device: 训练设备使用cpu还是gpu
+    :return: 评估分数；测试完成需要输出的信息
+    """
+    es = EvaluationScore(evaluations)
+    model.eval()
+    dataloader.dataset.mode = "test"
+    start_time = time.time()
+    with tqdm(total=len(dataloader), ncols=150) as _tqdm:
+        _tqdm.set_description(f'（进度条部分不会写进本地日志）测试进度')
+        with torch.no_grad():
+            for data in dataloader:
+                x = data[0].to(device)
+                y_true = data[2].to(device)
+                label_true = data[1].int().to(device)
+                y_loss, label_pred = model.detection(x=x, y_true=y_true, device=device)
+                if label_pred is not None:
+                    es.add_data(y_loss=None, true_label=label_true, pred_label=label_pred)
+                else:
+                    es.add_data(y_loss=y_loss, true_label=label_true, pred_label=None)
+                _tqdm.update(1)
+    end_time = time.time()
+    es.auto_threshold()
+    es_score = es.__str__().replace(" ", ", ")
+    info = f"{es_score}, test time={'{:.1f}'.format(end_time-start_time)}s"
+    return es, info
+
+
+def train_and_test_model(start_time: str, epochs: int, normal_dataloader: tud.DataLoader, attack_dataloader: tud.DataLoader,
+                         model: nn.Module, evaluations: [str], device: str = "cpu", lr: float = 1e-4,
+                         model_path: str = None, train: bool = True) -> (dict, dict):
+    """
+    训练与测试
+    :param start_time: 实验的开始时间。此处用于寻找存放路径。
+    :param epochs: 总共训练轮数
+    :param normal_dataloader: 训练数据集
+    :param attack_dataloader: 测试数据集
+    :param model: 模型
+    :param evaluations: 评估方法
+    :param device: 设备
+    :param lr: 学习率
+    :param model_path: 模型参数文件路径
+    :param train: 是否训练，如果为否，则仅进行测试
+    :return: 各个评估方法的最佳分数、各个评估方法最佳情况下的检测标签
+    """
+    dataset_name = normal_dataloader.dataset.name
+    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
+    if model_path:
+        try:
+            checkpoint = torch.load(model_path)
+            model.load_state_dict(checkpoint['model_state_dict'])
+            optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
+            logger.info(f"模型参数文件{model_path}加载成功")
+        except:
+            logger.warning(f"模型参数文件{model_path}加载失败，将训练新模型")
+    logger.info(f"模型：{model.name}，数据集：{dataset_name}，设备：{device}，训练开始")
+    best_score = {}
+    best_detection = {}
+    if train:
+        logger.info(f"模式：训练并测试")
+        for epoch in range(1, epochs+1):
+            model, train_info = train_model(epoch=epoch, optimizer=optimizer, dataloader=normal_dataloader, model=model,
+                                            device=device)
+            es, test_info = test_model(dataloader=attack_dataloader, model=model, evaluations=evaluations,
+                                       device=device)
+            logger.info(f"{train_info}, {test_info}")
+            es_score = es.__str__().replace(" ", "_")
+            torch.save({
+                'model_state_dict': model.state_dict(),
+                'optimizer_state_dict': optimizer.state_dict()
+            }, f'./records/{start_time}/model/model={model.name}_dataset={dataset_name}_epoch={epoch}_{es_score}.pth')
+            for func_name in es.scores:
+                if func_name not in best_score or es.scores[func_name] > best_score[func_name]:
+                    best_score[func_name] = es.scores[func_name]
+                    best_detection[func_name] = es.true_pred_dict[func_name]
+                    best_detection["true"] = es.true_pred_dict["true"]
+    else:
+        logger.info(f"模式：仅进行测试")
+        es, test_info = test_model(dataloader=attack_dataloader, model=model, evaluations=evaluations, device=device)
+        logger.info(test_info)
+        for func_name in es.scores:
+            best_score[func_name] = es.scores[func_name]
+            best_detection[func_name] = es.true_pred_dict[func_name]
+            best_detection["true"] = es.true_pred_dict["true"]
+    return best_score, best_detection
+
+
+
+