@inproceedings{2f45ae12051646ac9ae21bc236781e13,
title = "Unsupervised Video Anomaly Detection with Swin Transformer and Temporal-Context Modeling",
abstract = "Video anomaly detection has broad applications in public safety, health monitoring, and emergency response. Existing transformer-based methods often struggle to capture multi-scale spatiotemporal patterns effectively. In this paper, we propose an unsupervised video anomaly detection framework that enhances feature representation by combining a 3D Swin Transformer with temporal shift modules and dynamic large kernel (DLK) convolutions. The 3D encoder-decoder structure models normal behavior from video sequences, and anomalies are identified through reconstruction errors. We validate our method on three public datasets: ShanghaiTech, Avenue, and Ped2. Our model achieves competitive accuracy, demonstrating improved capacity for capturing temporal dynamics and contextual details.",
keywords = "unsupervised learning, Swin Transformer, Spatio-Temporal Modeling, multi-scale representation",
author = "Yi Sun and Scotney, \{Bryan W.\} and Xiushan Nie and Shuai Zhang and Xingbo Liu and Lanting Qiu",
year = "2026",
month = may,
day = "20",
doi = "10.1109/acait67930.2025.11521860",
language = "English",
isbn = "979-8-3315-8788-8",
series = "2025 Asian Conference on Artificial Intelligence Technology (ACAIT)",
publisher = "IEEE",
pages = "1468--1472",
booktitle = "2025 Asian Conference on Artificial Intelligence Technology (ACAIT)",
address = "United States",
note = "2025 Asian Conference on Artificial Intelligence Technology (ACAIT) ; Conference date: 12-09-2025 Through 14-09-2025",
}