@mastersthesis { Babu2023, author = { Sandeep Babu }, title = { Real-Time Semantic Aided Place Recognition for Loop Closure Detection }, month = { June }, year = { 2023 }, school = { Paderborn University }, type = { Master's Thesis }, abstract = { A SLAM system requires effective loop closure detection to reduce the errors in the map and create a consistent map. Currently, GET Lab uses a loop closure detection approach that encodes low-level features as the global descriptors to represent scenes. Due to the difficulty in generating effective descriptors which are robust to occlusion and viewpoint changes, place recognition for 3D point cloud remains an open issue. Inspired by the perspective of humans, who recognize scenes through identifying semantic objects and capturing their relations. This study explores the use of high-level features, namely semantics, to improve the descriptor's capability to represent scenes. Further proposes a semantic graph representation to encode the semantic and topological information of the raw point cloud. Place recognition is modeled as a graph matching problem and then a graph similarity network is used to compute the similarity. The new method should run in real time and can be integrated into the existing SLAM system. The evaluations will be tested in real-world indoor and large urban outdoor environments at Paderborn University. } } @inproceedings { BJFM2023, author = { Sandeep Babu and Majid Jegarian and Dirk Fischer and B{"a}rbel Mertsching }, title = { Fast 3D Semantic Segmentation Using a Self Attention Network and Random Sampling }, month = { July }, year = { 2023 }, booktitle = { Towards Autonomous Robotic Systems }, pages = { 255 -- 266 }, publisher = { Springer Nature Switzerland }, isbn = { 978-3-031-43360-3 }, abstract = { For many use cases, reliable autonomous behavior of mobile robots can only be achieved if semantic information about the environment is available together with a topological map. However, current techniques either rely on costly sampling methods or involve computationally heavy pre- or post-processing steps, making them unsuitable for real-time systems with limited resources. In this paper, we propose an optimized approach for 3D point cloud processing that uses a self attention network combined with random sampling to directly infer the semantics of individual 3D points. The approach achieves competitive results on large scale point cloud data sets, including Semantic KITTI and S3DIS. } } @mastersthesis { Charania2023, author = { Sarim Charania }, title = { Robot Arm Control Using Hand-eye Coordination Based on Object Tracking }, month = { May }, year = { 2023 }, school = { Paderborn University }, type = { Master's thesis }, abstract = { Robot dexterity is the ability of a robot to handle and manipulate objects in its environment. Its use cases can be found in multiple domains of robotics ranging from household robots to industrial ones. Handling objects with precision requires the robot to not only accurately identify their pose, but also ensure that the manipulator can execute the planned path to the object with high precision. This can be challenging because a manipulator’s ability to reach a desired position can be affected by errors in the mechanical components such as backlash in gears. To overcome this the robot must identify and correct the differences between its end effector’s intended and actual position during the execution. This becomes even more challenging when operating in unknown environments. Inspired by how humans perform handling tasks, the goal is to develop a closed loop hand-eye coordination system that relies on the relative poses of the perceived object and the end effector to guide the arm. } } @mastersthesis { Leineke2022, author = { Marc Leineke }, title = { Analyse aktueller Verfahren zur Langzeit-Objektverfolgung mit einmaliger Initialisierung }, month = { June }, year = { 2023 }, school = { Paderborn University }, type = { Master's thesis }, abstract = { Ein einflussreiches Verfahren zur Objektverfolgung ist das Tracking-Learning-Detection Verfahren (TLD) von Kalal et al. aus dem Jahr 2010, welches als Online-Langzeit-Tracker mit einmaliger Initialisierung charakterisiert werden kann. Durch die geschickte Kombination der drei Komponenten Tracker, Learner und Detector arbeitet dieses Verfahren {"a}u{\ss}erst zuverl{"a}ssig. Der Detector kann den Tracker reinitialisieren, falls der Tracker das Objekt verliert. Der Learner nutzt den sogenannten P-N Algorithmus, um eine ansichtenbasierte Objektrepr{"a}sentation in Form von Positiv- und Negativbeispielen zu erstellen und damit das Objektmodell des Detectors systematisch zu erweitern. Neuere Verfahren {"u}bertreffen jedoch das TLD-Verfahren, insbesondere bez{"u}glich der Genauigkeit der Verfolgung. In den letzten Jahren wurden au{\ss}erdem immer mehr Deep Learning-basierte Verfahren entwickelt. Durch eine gezielte Gegen{"u}berstellung und systematischer Tests verschiedener Verfahren soll im Rahmen dieser Arbeit untersucht werden, welche Eigenschaften und Kriterien ein Online-Verfahren zur Objektverfolgung besonders robust f{"u}r eine Langzeitverfolgung machen. Dazu werden TLD-{"a}hnliche Verfahren identifiziert und systematisch analysiert. } } @mastersthesis { Mazumder2023, author = { Kanak Mazumder }, title = { Surfel based Semantic Maps for Exploration in Dynamic Environments }, month = { March }, year = { 2023 }, school = { Paderborn University }, type = { Master's Thesis }, abstract = { Mapping and localization is an integral part of any autonomous mobile robot application. However, a bare point cloud map only contains geometric information, which is insufficient for making informed decisions for high-level tasks. Moreover, most SLAM systems inherently assume static environments and fail to maintain a consistent model of the environment if it changes over time. In this thesis, a 3D semantic mapping system is developed which can create a consistent model of a dynamic environment based on adaptive surfel-based representation. Using a SLAM system to estimate camera poses, the developed system can fuse color images and depth images into a surfel-based map where each surfel represents the geometry and semantics of a surface patch. If a part of the environment changes, surfels representing that area also change to match the map with the real scene. The system reduces the semantic segmentation error using a flood-fill refinement approach and superpixel-based local neighborhood statistics. The semantic knowledge over multiple observations is fused using a recursive Bayesian scheme. The semantic map is extended autonomously by exploring the environment based on incremental processing of semantic and geometric information stored in surfels. } } @mastersthesis { Mourach2023, author = { Khalid Mourach }, title = { Semantic Image Segmentation for Search-and-Rescue Scenarios using Deep Learning and Attention Mechanisms }, month = { November }, year = { 2023 }, school = { Paderborn University }, type = { Master's Thesis }, abstract = { The importance of incorporating robots in search-and-rescue missions has been on the rise. Nevertheless, their effectiveness heavily depends on their capability to process visual information. This work investigates the potential of deep-learning architectures integrating attention mechanisms in enhancing semantic image segmentation for search-and-rescue scenarios. Attention mechanisms in transformer architectures are explored as they have demonstrated remarkable performance in Natural Language Processing and Computer Vision by capturing rich contextual information. Since no existing segmentation dataset aligns with the objectives of this work, image classification datasets for disaster response can be labelled for training and testing purposes. Among other potential architectures, in particular SegFormer is considered for implementation, due to its success in achieving state-of-the-art results while being computationally efficient. For implementation and training, the well-established PyTorch library is used. After training, the results are evaluated regarding generalization capability as well as the integration and the effects of attention mechanisms in particular. } } @mastersthesis { Neugebauer2022, author = { Sarah Neugebauer }, title = { Extraktion und Vorverarbeitung von Objektkonturen aus Regionensegmentierungen }, month = { August }, year = { 2023 }, school = { Paderborn University }, type = { Master's Thesis }, abstract = { Im Rahmen dieser Bachelorarbeit soll ein Verfahren zur Extraktion und Vorverarbeitung von Objektkonturen aus regionenbasierten Bildsegmentierungen implementiert und eingesetzt werden. Zur Vorverarbeitung z{"a}hlt insbesondere das geordnete Einlesen. Die resultierenden Konturen k{"o}nnen beispielsweise zur Erstellung eines Trainingsdatensatzes zur Detektion von Schl{"u}sselpunkten entlang von Objektkonturen eingesetzt werden. Bei den Schl{"u}sselpunkten handelt es sich in der Regel um Kr{"u}mmungsextrema, die zum Beispiel zur Objekterkennung eingesetzt werden k{"o}nnen. Um die Objektkonturen zu erhalten, sollen zun{"a}chst reale Bilder durch ein ausgew{"a}hltes Verfahren segmentiert werden. Dazu werden Selective Search und Deep Learning basierte Verfahren betrachtet. Idealerweise kann ein bestehender Datensatz aus segmentierten Bildern herangezogen werden. Um m{"o}glichst vielseitig einsetzbare Konturen zu erhalten, sollen Objekte unterschiedlicher Gr{"o}{\ss}enordnungen ber{"u}cksichtigt werden. Eine M{"o}glichkeit der Vorgehensweise besteht zum Beispiel darin, einzelne Regionen zun{"a}chst zu isolieren und dann den Rand zu erfassen. Um die Konturen in geordneter Reihenfolge einzulesen, kann unter anderem ein bestehendes Verfahren aus dem GET Lab zur Analyse von Kantenbildern eingesetzt werden. } } @mastersthesis { Patel2023, author = { Jahnavi Kiran Patel }, title = { Autonomous Navigation in Dynamic Environments using Sensor-Fusion based Multiple Object Tracking }, month = { November }, year = { 2023 }, school = { Paderborn University }, type = { Master's Thesis }, abstract = { To navigate safely through crowded dynamic environments, an autonomous system must analyze the behavior of surrounding objects and incorporate it into the actions taken. GET Lab currently employs a trajectory planner in conjunction with a reactive obstacle avoidance method. However, the motion of dynamic objects is not taken into consideration. To address this, Multiple Object Tracking (MOT) is employed in this thesis to interpret motion patterns of obstacles around the ego system. The tracking is performed based on the outcome from sensor fusion of camera and lidar data. The motion analysis is used to predict possible positions of the objects in the future. Potentially safe or unsafe areas in the robot’s environment are modeled into a risk map by encoding the patterns and predictions. The risk map is integrated to the combination of a local and global path planner. This enables the robot to take early preventive measures while executing control commands. The developed system operates using a modular pipeline and is real-time capable. For evaluation, the system is tested on public datasets and simulated test scenarios. } } @mastersthesis { Poo2021, author = { Dheeraj Rajashekar Poolavaram }, title = { Deep Learning based Scale Estimation for Local Image Features }, month = { January }, year = { 2023 }, school = { Paderborn University }, type = { Master's thesis }, abstract = { The objective of this work is to develop a deep learning-based framework to estimate the characteristic scales of local image features. The characteristic scale describes the size of a feature and defines the region used to form the corresponding feature vector. Feature vectors are used as a basis for feature matching, object detection, object tracking, and other applications. Some conventional feature detection algorithms use computationally expensive and complex scale-space analysis schemes. This work aims to replace these schemes with a deep-learning-based framework, which can be used for a range of different feature detectors. This requires two steps: creating data sets with local image features together with characteristic scales and selecting, implementing, and testing appropriate Convolutional Neural Network (CNN) architectures. The data set will be created based on existing data sets for object recognition with several thousand real images. The features and the characteristic scales will be obtained using standard feature detectors such as SIFT, SURF, and ORB. For the CNN, the architectures from LIFT, LF-Net, and Key.Net will be considered. Using the framework, the estimated characteristic scales should be repeatable and precise. } } @mastersthesis { Siddiqui2023, author = { Waleed Nauman Siddiqui }, title = { Spatiotemporal Clustering of Driving Situations Using Unsupervised Learning for Sensor Data Encoding }, month = { March }, year = { 2023 }, school = { Paderborn University }, abstract = { All drivers have their own habitual choice of driving style, but these driving styles also vary under specific environmental scenarios. To improve the functionality of advanced driver systems, information regarding the surrounding is of utmost importance and can help to predict driving behaviours under different circumstances. To better understand different environmental scenarios, this thesis aims to develop a system that can differentiate environmental scenes based on weather, traffic or road conditions. Images and point clouds of different scenes are gathered from a driving vehicle driven by different drivers on the same route. Using unsupervised manifold learning, the dimensionality of the raw sensor data is reduced to a 2D mapping and then clustered into representative driving situations. Not-Too-Deep (N2D) has been picked as a baseline system. An autoencoder from N2D is used for extracting features from images. Apart from that, beta-VAE is also used for image feature extraction. For point cloud feature extraction FoldingNet, LRGM and CenterPoint autoencoders have been used. Features from images and point clouds are also fused to obtain feature points with better environmental information. Principal component analysis (PCA) is first used to reduce the dimensions of these features after PCA uniform manifold approximation, and projection (UMAP) or pairwise controlled manifold approximation projection (PaCMAP) is used to reduce the dimensions to two. Finally, hierarchical density-based spatial clustering of applications with noise (HDBSCAN) is used for clustering. For evaluation, internal and external validation criteria are used, and a comparison is made between clustering using image features, clustering using point cloud features and clustering using fused features. } } @mastersthesis { Tondgaonkar2022, author = { Pranav Pravin Tondgaonkar }, title = { Unsupervised Deep Learning-based Shape Retrieval Using Invariant Contour Features }, month = { November }, year = { 2023 }, school = { Paderborn University }, type = { Master's Thesis }, abstract = { The objective of this work was to develop an unsupervised deep learning-based method for shape retrieval. The developed method uses specific scale- and rotation-invariant keypoints detected along the curvature extrema of object contours, which, according to information theory, are the most informative points. Each detected keypoint is described by a feature vector and has the following geometrical information: position, scale, and orientation. The similarity in shape between contour segments enclosed by these keypoints can pose challenges in distinguishing between objects. For this purpose, graphs are used to model the spatial arrangement of keypoints using their geometric information and feature vectors. The development of the method involves three main steps: firstly, the creation of training data in the form of graphs using the geometric information and corresponding feature vectors of keypoints; secondly, the selection, implementation, and testing of appropriate GNN architectures to learn shape representations; and thirdly, measuring the similarity between shapes using the learned representations to perform shape retrieval. The MPEG-7 data set, which consists of 1400 shape samples in 70 distinct classes with 20 samples per class, is utilized in this work. The retrieval performance of the developed method is evaluated using a metric called the bull’s eye score (BES). Primarily, two shape retrieval methods using two unsupervised learning techniques were implemented and tested: GNN-based autoencoders and self-supervised graph contrastive learning. Specifically, a GNN-based autoencoder, namely the mod-GSAE, achieved an overall BES of 54.96 %. Furthermore, the retrieval results were systematically analyzed, and it was found that the representations become less distinct when the distribution of keypoints in the shape is uneven. The findings of this systematic analysis are presented in this work. } }