@inproceedings{fuchss_lissa_2025,author={Fuchß, Dominik and Hey, Tobias and Keim, Jan and Liu, Haoyu and Ewald, Niklas and Thirolf, Tobias and Koziolek, Anne},year={2025},title={{LiSSA: Toward Generic Traceability Link Recovery through Retrieval-Augmented Generation}},booktitle={Proceedings of the IEEE/ACM 47th International Conference on Software Engineering},publisher={{Institute of Electrical and Electronics Engineers (IEEE)}},location={Ottawa, Canada},series={ICSE '25},}
ICSA
Enabling Architecture Traceability by LLM-based Architecture Component Name Extraction
@inproceedings{fuchss_enabling_2025,author={Fuchß, Dominik and Liu, Haoyu and Hey, Tobias and Keim, Jan and Koziolek, Anne},year={2025},title={{Enabling Architecture Traceability by LLM-based Architecture Component Name Extraction}},eventtitle={22nd IEEE International Conference on Software Architecture},eventtitleaddon={ICSA 2025},eventdate={2025-03-31/2025-04-04},venue={Odense, Denmark},booktitle={2025 IEEE 22nd International Conference on Software Architecture (ICSA)},publisher={{Institute of Electrical and Electronics Engineers (IEEE)}},keywords={Traceability Link Recovery, Large Language Models, Software Architecture, Model Extraction},language={english},}
REFSQ
Requirements Traceability Link Recovery via Retrieval-Augmented Generation
@inproceedings{hey_requirements_2025,author={Hey, Tobias and Fuchß, Dominik and Keim, Jan and Koziolek, Anne},year={2025},title={{Requirements Traceability Link Recovery via Retrieval-Augmented Generation}},booktitle={{Requirements Engineering: Foundation for Software Quality}},publisher={Springer},address={Cham},}
Structuring Scientific Knowledge in Software Engineering Using the Open Research Knowledge Graph
Angelika Kaplan, Fatma Chebbi, Dominik Fuchß, Oliver Karras, Tobias Hey, Anne Koziolek, and Ralf Reussner
@article{kaplan_structuring_2025,author={Kaplan, Angelika and Chebbi, Fatma and Fuchß, Dominik and Karras, Oliver and Hey, Tobias and Koziolek, Anne and Reussner, Ralf},title={{Structuring Scientific Knowledge in Software Engineering Using the Open Research Knowledge Graph}},booktitle={{Software Engineering 2025 – Companion Proceedings}},year={2025},publisher={Gesellschaft für Informatik, Bonn},doi={10.18420/SE2025-WS-29},keywords={Research Data Management, Software Engineering, Research Knowledge Graphs, Open Research Knowledge Graph, Semantic Modeling},language={en},}
2024
RE
Requirements Classification for Traceability Link Recovery
Being aware of and understanding the relations between the requirements of a software system to its other artifacts is crucial for their successful development, maintenance and evolution. There are approaches to automatically recover this traceability information, but they fail to identify the actual relevant parts of the requirements. Recent large language model-based requirements classification approaches have shown to be able to identify aspects and concerns of requirements with promising accuracy. Therefore, we investigate the potential of those classification approaches for identifying irrelevant requirement parts for traceability link recovery between requirements and code. We train the large language model-based requirements classification approach NoRBERT on a new dataset of requirements and their entailed aspects and concerns. We use the results of the classification to filter irrelevant parts of the requirements before recovering trace links with the fine-grained word embedding-based FTLR approach. Two empirical studies show promising results regarding the quality of classification and the impact on traceability link recovery. NoRBERT can identify functional and user-related aspects in the requirements with an F\_1\-score of 84%. With the classification and requirements filtering, the performance of FTLR could be improved significantly and FTLR performs better than state-of-the-art unsupervised traceability link recovery approaches.
@inproceedings{hey_requirements_2024,title={{Requirements Classification for Traceability Link Recovery}},booktitle={{2024 IEEE 32nd International Requirements Engineering Conference (RE)}},author={Hey, Tobias and Keim, Jan and Corallo, Sophie},year={2024},pages={155-167},doi={10.1109/RE59067.2024.00024},}
ICSE
Recovering Trace Links Between Software Documentation And Code
Introduction Software development involves creating various artifacts at different levels of abstraction and establishing relationships between them is essential. Traceability link recovery (TLR) automates this process, enhancing software quality by aiding tasks like maintenance and evolution. However, automating TLR is challenging due to semantic gaps resulting from different levels of abstraction. While automated TLR approaches exist for requirements and code, architecture documentation lacks tailored solutions, hindering the preservation of architecture knowledge and design decisions. Methods This paper presents our approach TransArC for TLR between architecture documentation and code, using componentbased architecture models as intermediate artifacts to bridge the semantic gap. We create transitive trace links by combining the existing approach ArDoCo for linking architecture documentation to models with our novel approach ArCoTL for linking architecture models to code. Results We evaluate our approaches with five open-source projects, comparing our results to baseline approaches. The model-to-code TLR approach achieves an average F1-score of 0.98, while the documentation-to-code TLR approach achieves a promising average F1-score of 0.82, significantly outperforming baselines. Conclusion Combining two specialized approaches with an intermediate artifact shows promise for bridging the semantic gap. In future research, we will explore further possibilities for such transitive approaches.
@inproceedings{keim_recovering_2024,title={Recovering {{Trace Links Between Software Documentation And Code}}},booktitle={2024 {{IEEE}}/{{ACM}} 46th {{International Conference}} on {{Software Engineering}} ({{ICSE}})},author={Keim, Jan and Corallo, Sophie and Fuch{\ss}, Dominik and Hey, Tobias and Telge, Tobias and Koziolek, Anne},year={2024},month=apr,pages={2655--2667},issn={1558-1225},doi={10.1145/3597503.3639130},urldate={2024-01-17},}
2023
Dissertation
Automatische Wiederherstellung von Nachverfolgbarkeit zwischen Anforderungen und Quelltext
For the efficient development, maintenance and management of software systems, a comprehensive understanding of the relationships between software artifacts plays a crucial role. The traceability of these relationships makes it possible, for example, to comprehend past design decisions or to assess the impacts of changes. However, manually creating and maintaining traceability information entails high manual effort and therefore potentially high costs, as human expertise is usually required to understand the relationships. This is why this information is not available in most software projects. However, if traceability information between software artifacts could be generated automatically, the development, maintenance and management of a wide range of software systems could be made more efficient. Existing approaches to automatically recover trace links between requirements and source code are not able to bridge the semantic gap between artifacts. They achieve too low precision at acceptable recall levels to be used in practice.
The FTLR approach presented in this dissertation aims at improving the automatic traceability link recovery between requirements and source text by performing a semantic similarity comparison. Therefore, FTLR uses pre-trained fastText word embeddings to represent semantics. It leverages structural information of the requirements and source code by using mapping on the level of the components of the requirements and source code instead of on the artifact level. This mapping uses the Word Movers Distance, which provides a semantic similarity comparison that is not skewed by aggregation. The actual identification of trace links is achieved by a majority vote on all fine-grained links of an artifact to determine the most prevalent aspects and ignore irrelevant relationships. In an experiment on six benchmark datasets, the use of the Word Movers Distance showed a significant improvement of the identification of traceability connections over basic aggregated vector mappings. Similarly, the fine-grained mapping followed by an aggregation with majority vote showed significant improvements over a direct artifact level mapping.
To further increase FTLR’s precision, an approach for filtering irrelevant parts of requirements is applied. The approach is based on a classification of the requirement elements using a language model-based classifier. Crucial for the application in FTLR is an applicability to unseen projects. The presented classifier NoRBERT uses transfer learning to fine-tune large pre-trained BERT language models to the classification of requirements. Hereby, NoRBERT is able to achieve promising results on unseen projects.
The approach was able to achieve a mapping quality of up to 89.8% in F1-score on unseen projects. Determining whether a requirement element contains no functional aspects allows irrelevant parts of the requirements to be filtered out before processing by FTLR. A comparison of FTLR’s performance with and without such a requirement element filter showed that a significant performance increase in F1-score can be achieved by filtering. FTLR achieves F1-scores of up to 55.5% and results in mean average precision of up to 59.6%.
In addition to representing semantics through word embeddings that are pre-trained exclusively on natural language text, this work also investigates bimodal language models for the use in FTLR. These language models are pre-trained on large dual corpora, consisting of source code methods and their natural language documentation, and achieve promising results in related software engineering tasks, such as code search or bug localization. To investigate the applicability of these models for the automatic recovery of trace links between requirements and source code, two options for integrating the bimodal language model UniXcoder in FTLR were developed. In a comparison on five datasets for recovering traceability links between requirements and source code, this type of model showed no increase in performance over the more lightweight word embeddings.
Finally, the performance of FTLR was compared to the performance of existing approaches for unsupervised automatic traceability link recovery between requirements and source code. FTLR achieves higher mean average precision and F1-scores than existing approaches on projects that only contain object-oriented source code. However, the results also illustrate that, especially on large projects, all existing approaches including FTLR are still far from achieving the quality that is needed to fully automate traceability link recovery in practice.
@phdthesis{hey_automatische_2023,title={{Automatische Wiederherstellung von Nachverfolgbarkeit zwischen Anforderungen und Quelltext}},author={Hey, Tobias},year={2023},doi={10.5445/IR/1000162446},langid={german},school={Karlsruhe Institute of Technology (KIT)},keywords={Traceability Link Recovery, Requirements Classification},}
2022
MSR4SA
A Taxonomy for Design Decisions in Software Architecture Documentation
A software system is the result of all design decisions that were made during development and maintenance. Documentation, such as software architecture documentation, captures a variety of different design decisions. Classifying the kinds of design decisions facilitates various downstream tasks by enabling more targeted analyses. In this paper, we propose a taxonomy for design decisions in software architecture documentation to primarily support consistency checking. Existing taxonomies about design decisions have different purposes and do not fit well because they are too coarse. We take an iterative approach, starting with an initial taxonomy based on literature and considerations regarding consistency checking. Then, we mine open-source repositories to extract 17 software architecture documentations that we use to refine the taxonomy. We evaluate the resulting taxonomy with regard to purpose, structure, and application. Additionally, we explore the automatic identification and classification of design decisions in software architecture documentation according to the taxonomy. We apply different machine learning techniques, such as Logistic Regression, Decision Trees, Random Forests, and BERT to the 17 software architecture documentations. The evaluation yields a F1-score of up to 92.1% for identifying design decisions and a F1-score of up to 55.2% for the classification of the kind of design decision.
@inproceedings{keim_taxonomy_2022,title={{A Taxonomy for Design Decisions in Software Architecture Documentation}},booktitle={{2nd International Workshop on Mining Software Repositories for Software Architecture}},author={Keim, Jan and Hey, Tobias and Sauer, Bjarne and Koziolek, Anne},year={2022},month=sep,doi={10.5445/IR/1000149966},urldate={2023-01-26},langid={ngerman}}
2021
ICSME
Improving Traceability Link Recovery Using Fine-grained Requirements-to-Code Relations
Traceability information is a fundamental prerequisite for many essential software maintenance and evolution tasks, such as change impact and software reusability analyses. However, manually generating traceability information is costly and error-prone. Therefore, researchers have developed automated approaches that utilize textual similarities between artifacts to establish trace links. These approaches tend to achieve low precision at reasonable recall levels, as they are not able to bridge the semantic gap between high-level natural language requirements and code. We propose to overcome this limitation by leveraging fine-grained, method and sentence level, similarities between the artifacts for traceability link recovery. Our approach uses word embeddings and a Word Mover’s Distance-based similarity to bridge the semantic gap. The fine-grained similarities are aggregated according to the artifacts structure and participate in a majority vote to retrieve coarse-grained, requirement-to-class, trace links. In a comprehensive empirical evaluation, we show that our approach is able to outperform state-of-the-art unsupervised traceability link recovery approaches. Additionally, we illustrate the benefits of fine-grained structural analyses to word embedding-based trace link generation.
@inproceedings{hey_improving_2021,title={Improving {{Traceability Link Recovery Using Fine-grained Requirements-to-Code Relations}}},booktitle={2021 {{IEEE International Conference}} on {{Software Maintenance}} and {{Evolution}} ({{ICSME}})},author={Hey, Tobias and Chen, Fei and Weigelt, Sebastian and Tichy, Walter F.},date={2021-09},pages={12--22},issn={2576-3148},doi={10.1109/ICSME52107.2021.00008},eventtitle={2021 {{IEEE International Conference}} on {{Software Maintenance}} and {{Evolution}} ({{ICSME}})},}
AIRE
Knowledge-Based Sense Disambiguation of Multiword Expressions in Requirements Documents
Understanding the meaning and the senses of expressions is essential to analyze natural language requirements. Disambiguation of expressions in their context is needed to prevent misinterpretations. Current knowledge-based disambiguation approaches only focus on senses of single words and miss out on linking the shared meaning of expressions consisting of multiple words. As these expressions are common in requirements, we propose a sense disambiguation approach that is able to detect and disambiguate multiword expressions. We use a two-tiered approach to be able to use different techniques for detection and disambiguation. Initially, a conditional random field detects multiword expressions. Afterwards, the approach disambiguates these expressions and retrieves the corresponding senses using a knowledge-based approach. The knowledge-based approach has the benefit that only the knowledge base has to be exchanged to adapt the approach to new domains and knowledge. Our approach is able to detect multiword expressions with an F1-score of 88.4% in an evaluation on 997 requirement sentences. The sense disambiguation achieves up to 57% F1-score.
@inproceedings{hey_knowledgebased_2021,title={Knowledge-Based {{Sense Disambiguation}} of {{Multiword Expressions}} in {{Requirements Documents}}},booktitle={2021 {{IEEE}} 29th {{International Requirements Engineering Conference Workshops}} ({{REW}})},author={Hey, Tobias and Keim, Jan and Tichy, Walter F.},date={2021-09},pages={70--76},doi={10.1109/REW53955.2021.00017},eventtitle={2021 {{IEEE}} 29th {{International Requirements Engineering Conference Workshops}} ({{REW}})}}
2020
RE
NoRBERT: Transfer Learning for Requirements Classification
Classifying requirements is crucial for automatically handling natural language requirements. The performance of existing automatic classification approaches diminishes when applied to unseen projects because requirements usually vary in wording and style. The main problem is poor generalization. We propose NoRBERT that fine-tunes BERT, a language model that has proven useful for transfer learning. We apply our approach to different tasks in the domain of requirements classification. We achieve similar or better results F1-scores of up to 94%) on both seen and unseen projects for classifying functional and non-functional requirements on the PROMISE NFR dataset. NoRBERT outperforms recent approaches at classifying non-functional requirements subclasses. The most frequent classes are classified with an average F1-score of 87%. In an unseen project setup on a relabeled PROMISE NFR dataset, our approach achieves an improvement of 15 percentage points in average F1 score compared to recent approaches. Additionally, we propose to classify functional requirements according to the included concerns, i.e., function, data, and behavior. We labeled the functional requirements in the PROMISE NFR dataset and applied our approach. NoRBERT achieves an F1-score of up to 92%. Overall, NoRBERT improves requirements classification and can be applied to unseen projects with convincing results.
@inproceedings{hey_norbert_2020,title={{{NoRBERT}}: {{Transfer Learning}} for {{Requirements Classification}}},shorttitle={{{NoRBERT}}},booktitle={2020 {{IEEE}} 28th {{International Requirements Engineering Conference}} ({{RE}})},author={Hey, Tobias and Keim, Jan and Koziolek, Anne and Tichy, Walter F.},date={2020-08},pages={169--179},issn={2332-6441},doi={10.1109/RE48521.2020.00028},eventtitle={2020 {{IEEE}} 28th {{International Requirements Engineering Conference}} ({{RE}})},}
ACL
Programming in Natural Language with fuSE: Synthesizing Methods from Spoken Utterances Using Deep Natural Language Understanding
The key to effortless end-user programming is natural language. We examine how to teach intelligent systems new functions, expressed in natural language. As a first step, we collected 3168 samples of teaching efforts in plain English. Then we built fuSE, a novel system that translates English function descriptions into code. Our approach is three-tiered and each task is evaluated separately. We first classify whether an intent to teach new functionality is present in the utterance (accuracy: 97.7% using BERT). Then we analyze the linguistic structure and construct a semantic model (accuracy: 97.6% using a BiLSTM). Finally, we synthesize the signature of the method, map the intermediate steps (instructions in the method body) to API calls and inject control structures (F1: 67.0% with information retrieval and knowledge-based methods). In an end-to-end evaluation on an unseen dataset fuSE synthesized 84.6% of the method signatures and 79.2% of the API calls correctly.
@inproceedings{weigelt_programming_2020,title={Programming in {{Natural Language}} with {{fuSE}}: {{Synthesizing Methods}} from {{Spoken Utterances Using Deep Natural Language Understanding}}},shorttitle={Programming in {{Natural Language}} with {{fuSE}}},booktitle={Proceedings of the 58th {{Annual Meeting}} of the {{Association}} for {{Computational Linguistics}}},author={Weigelt, Sebastian and Steurer, Vanessa and Hey, Tobias and Tichy, Walter F.},date={2020-07},pages={4280--4295},publisher={{Association for Computational Linguistics}},location={{Online}},doi={10.18653/v1/2020.acl-main.395},url={https://www.aclweb.org/anthology/2020.acl-main.395},urldate={2020-07-15},eventtitle={{{ACL}} 2020}}
IJSC
Towards Programming in Natural Language: Learning New Functions from Spoken Utterances
Systems with conversational interfaces are rather popular nowadays. However, their full potential is not yet exploited. For the time being, users are restricted to calling predefined functions. Soon, users will expect to customize systems to their needs and create own functions using nothing but spoken instructions. Thus, future systems must understand how laypersons teach new functionality to intelligent systems. The understanding of natural language teaching sequences is a first step toward comprehensive end-user programming in natural language. We propose to analyze the semantics of spoken teaching sequences with a hierarchical classification approach. First, we classify whether an utterance constitutes an effort to teach a new function or not. Afterward, a second classifier locates the distinct semantic parts of teaching efforts: declaration of a new function, specification of intermediate steps, and superfluous information. For both tasks we implement a broad range of machine learning techniques: classical approaches, such as Naïve Bayes, and neural network configurations of various types and architectures, such as bidirectional LSTMs. Additionally, we introduce two heuristic-based adaptations that are tailored to the task of understanding teaching sequences. As data basis we use 3168 descriptions gathered in a user study. For the first task convolutional neural networks obtain the best results (accuracy: 96.6%); bidirectional LSTMs excel in the second (accuracy: 98.8%). The adaptations improve the first-level classification considerably (plus 2.2% points).
@article{weigelt_programming_2020a,title={Towards {{Programming}} in {{Natural Language}}: {{Learning New Functions}} from {{Spoken Utterances}}},shorttitle={Towards {{Programming}} in {{Natural Language}}},author={Weigelt, Sebastian and Steurer, Vanessa and Hey, Tobias and Tichy, Walter F.},date={2020-06},journal={International Journal on Semantic Computing},volume={14},number={02},pages={249--272},publisher={{World Scientific Publishing Co.}},issn={1793-351X},doi={10.1142/S1793351X20400097},url={https://www.worldscientific.com/doi/abs/10.1142/S1793351X20400097},urldate={2022-03-18},}
ICSC
Roger That! Learning How Laypersons Teach New Functions to Intelligent Systems
Intelligent systems are rather smart today but still limited to built-in functionality. To break through this barrier, future systems must allow users to easily adapt the system by themselves. For humans the most natural way to communicate is talking. But what if users want to extend the systems’ functionality with nothing but natural language? Then intelligent systems must understand how laypersons teach new skills. To grasp the semantics of such teaching sequences, we have defined a hierarchical classification task. On the first level, we consider the existence of a teaching intent in an utterance; on the second, we classify the distinct semantic parts of teaching sequences: declaration of a new function, specification of intermediate steps, and superfluous information. We evaluate twelve machine learning techniques with multiple configurations tailored to this task ranging from classical approaches such as na??ve-bayes to modern techniques such as bidirectional LSTMs and task-oriented adaptations. On the first level convolutional neural networks achieve the best accuracy (96.6%). For the second task, bidirectional LSTMs are the most accurate (98.8%). With the additional adaptations we are able to improve both classifications distinctly (up to 1.8%).
@inproceedings{weigelt_roger_2020,title={Roger That! {{Learning How Laypersons Teach New Functions}} to {{Intelligent Systems}}},booktitle={2020 {{IEEE}} 14th {{International Conference}} on {{Semantic Computing}} ({{ICSC}})},author={Weigelt, Sebastian and Steurer, Vanessa and Hey, Tobias and Tichy, Walter F.},date={2020-02},pages={93--100},issn={2325-6516},doi={10.1109/ICSC.2020.00020},eventtitle={2020 {{IEEE}} 14th {{International Conference}} on {{Semantic Computing}} ({{ICSC}})},keywords={Intelligent systems,Machine learning,Neural networks,Semantics,Task analysis,Training}}
IJHCC
What’s the Matter? Knowledge Acquisition by Unsupervised Multi-Topic Labeling for Spoken Utterances
@article{weigelt_what_2020,title={What’s the {{Matter}}? {{Knowledge Acquisition}} by {{Unsupervised Multi-Topic Labeling}} for {{Spoken Utterances}}},author={Weigelt, Sebastian and Keim, Jan and Hey, Tobias and Tichy, Walter F.},date={2020},journal={International Journal of Humanized Computing and Communication},volume={1},number={1},pages={43--66},publisher={{Institute for Semantic Computing Foundation}},issn={2641-953X},doi={10.35708/HCC1868-126364},langid={english},}
Traceability information is important for software maintenance, change impact analysis, software reusability, and other software engineering tasks. However, manually generating this information is costly. State-of-the-art automation approaches suffer from their imprecision and domain dependence. I propose INDIRECT, an intent-driven approach to automated requirements-to-code traceability. It combines natural language understanding and program analysis to generate intent models for both requirements and source code. Then INDIRECT learns a mapping between the two intent models. I expect that using the two intent models as base for the mapping poses a more precise and general approach. The intent models contain information such as the semantics of the statements, underlying concepts, and relations between them. The generation of the requirements intent model is divided into smaller subtasks by using an iterative natural language understanding. Likewise, the intent model for source code is built iteratively by identifying and understanding semantically related source code chunks.
@inproceedings{hey_indirect_2019,title={{{INDIRECT}}: {{Intent-Driven Requirements-to-Code Traceability}}},shorttitle={{{INDIRECT}}},booktitle={2019 {{IEEE}}/{{ACM}} 41st {{International Conference}} on {{Software Engineering}}: {{Companion Proceedings}} ({{ICSE-Companion}})},author={Hey, Tobias},date={2019-05},pages={190--191},doi={10.1109/ICSE-Companion.2019.00078},eventtitle={2019 {{IEEE}}/{{ACM}} 41st {{International Conference}} on {{Software Engineering}}: {{Companion Proceedings}} ({{ICSE-Companion}})},keywords={Natural Language Understanding,Program Analysis,Requirements Traceability,Traceability Link Recovery}}
HCC
Unsupervised Multi-Topic Labeling for Spoken Utterances
Systems such as Alexa, Cortana, and Siri appear rather smart. However, they only react to predefined wordings and do not actually grasp the user’s intent. To overcome this limitation, a system must grasp the topics the user is talking about. Therefore, we apply unsupervised multi-topic labeling to spoken utterances. Although topic labeling is a well-studied task on textual documents, its potential for spoken input is almost unexplored. Our approach for topic labeling is tailored to spoken utterances; it copes with short and ungrammatical input. The approach is two-tiered. First, we disambiguate word senses. We utilize Wikipedia as pre-labeled corpus to train a naïve-bayes classifier. Second, we build topic graphs based on DBpedia relations. We use two strategies to determine central terms in the graphs, i.e. the shared topics. One focuses on the dominant senses in the utterance and the other covers as many distinct senses as possible. Our approach creates multiple distinct topics per utterance and ranks results. The evaluation shows that the approach is feasible; the word sense disambiguation achieves a recall of 0.799. Concerning topic labeling, in a user study subjects assessed that in 90.9% of the cases at least one proposed topic label among the first four is a good fit. With regard to precision, the subjects judged that 77.2% of the top ranked labels are a good fit or good but somewhat too broad (Fleiss’ kappa k = 0.27).
@inproceedings{weigelt_unsupervised_2019,title={Unsupervised {{Multi-Topic Labeling}} for {{Spoken Utterances}}},booktitle={2019 {{IEEE International Conference}} on {{Humanized Computing}} and {{Communication}} ({{HCC}})},author={Weigelt, Sebastian and Keim, Jan and Hey, Tobias and Tichy, Walter F.},date={2019-09},pages={38--45},doi={10.1109/HCC46620.2019.00014},eventtitle={2019 {{IEEE International Conference}} on {{Humanized Computing}} and {{Communication}} ({{HCC}})}}
State-of-the-art intelligent assistant systems such as Siri & Co. struggle with conditionals. They reliably react to ordinary commands. However, their architectures are not designed to cope with complex conditional queries. We propose a system to overcome these limitations. Our approach models if-then-else constructs in spoken utterances explicitly. The model bridges the gap between linguistic and programmatic semantics. To proof our concept, we apply a rule-based approach to extract conditionals. For our prototype we use part-of-speech and chunk tags provided by NLP tools. We make use of coreference information to determine the reference frame of a condition. The explicit modeling of conditionals allows us to evaluate the accuracy of our approach independently from other language understanding tasks. The prototype works well in the domain of humanoid robotics. In a user study we achieve F1 scores of 0.783 (automatic speech recognition) up to 0.898 (manual transcripts) on unrestricted utterances.
@inproceedings{weigelt_detection_2018,ids={weigeltDetection2018},title={Detection of {{Conditionals}} in {{Spoken Utterances}}},booktitle={2018 {{IEEE}} 12th {{International Conference}} on {{Semantic Computing}} ({{ICSC}})},author={Weigelt, Sebastian and Hey, Tobias and Steurer, Vanessa},date={2018-01},pages={85--92},doi={10.1109/ICSC.2018.00021},eventtitle={2018 {{IEEE}} 12th {{International Conference}} on {{Semantic Computing}} ({{ICSC}})}}
IJSC
Detection of Control Structures in Spoken Utterances
State-of-the-art intelligent assistant systems such as Siri and Cortana do not consider control structures in the user input. They reliably react to ordinary commands. However, their architectures are not designed to cope with queries that require complex control flow structuring. We propose a system to overcome these limitations. Our approach models if-then-else, loop, and concurrency constructs in spoken utterances explicitly. The model bridges the gap between linguistic and programmatic semantics.To demonstrate our concept, we apply a rule-based approach. We have implemented three prototypes that use keyphrases to discover potential control structures depending on the type of control structure. However, the full structures are determined differently. For conditionals we use chunk and part-of-speech (POS) tags provided by natural language processing tools; for loops and concurrency we make use of an action extraction approach based on semantic role labeling (SRL). Additionally, we use coreference information to determine the extent of the respective structure.The explicit modeling of conditionals, loops, and concurrent sections allows us to evaluate the accuracy of our approaches independently from each other and from other language understanding tasks. We have conducted two user studies in the domain of humanoid robotics. The first focused on conditionals. Our prototype achieves F1 scores from 0.783 (automatic speech recognition) to 0.898 (manual transcripts) on unrestricted utterances. In the second, the prototypes for loop and concurrency detection also proved useful. F1 scores range from 0.588 (automatic speech recognition) to 0.814 (manual transcripts) for loops and from 0.622 (automatic speech recognition) to 0.842 (manual transcripts) for concurrent sections respectively.
@article{weigelt_detection_2018a,title={Detection of {{Control Structures}} in {{Spoken Utterances}}},author={Weigelt, Sebastian and Hey, Tobias and Steurer, Vanessa},date={2018-09-01},journal={International Journal on Semantic Computing},volume={12},number={03},pages={335--360},publisher={{World Scientific Publishing Co.}},issn={1793-351X},doi={10.1142/S1793351X18400159},url={https://www.worldscientific.com/doi/abs/10.1142/S1793351X18400159},urldate={2020-03-26},}
RAISE
Integrating a Dialog Component into a Framework for Spoken Language Understanding
Spoken language interfaces are the latest trend in human computer interaction. Users enjoy the newly found freedom but developers face an unfamiliar and daunting task. Creating reactive spoken language interfaces requires skills in natural language processing. We show how a developer can integrate a dialog component in a natural language processing system by means of software engineering methods. Our research project PARSE that aims at naturalistic end-user programming in spoken natural language serves as an example. We integrate a dialog component with PARSE without affecting its other components: We modularize the dialog management and introduce dialog acts that bundle a trigger for the dialog and the reaction of the system. We implemented three dialog acts to address the following issues: speech recognition uncertainties, coreference ambiguities, and incomplete conditionals. We conducted a user study with ten subjects to evaluate our approach. The dialog component achieved resolution rates from 23% to 50% (depending on the dialog act) and introduces a negligible number of errors. We expect the overall performance to increase even further with the implementation of additional dialog acts.
@inproceedings{weigelt_integrating_2018,title={Integrating a {{Dialog Component}} into a {{Framework}} for {{Spoken Language Understanding}}},booktitle={Proceedings of the 6th {{International Workshop}} on {{Realizing Artificial Intelligence Synergies}} in {{Software Engineering}}},author={Weigelt, Sebastian and Hey, Tobias and Landhäußer, Mathias},date={2018},series={{{RAISE}} '18},pages={1--7},publisher={{ACM}},location={{New York, NY, USA}},doi={10.1145/3194104.3194105},url={http://doi.acm.org/10.1145/3194104.3194105},urldate={2018-10-26},}
Current systems with spoken language interfaces do not leverage contextual information. Therefore, they struggle with understanding speakers’ intentions. We propose a system that creates a context model from user utterances to overcome this lack of information. It comprises eight types of contextual information organized in three layers: individual, conceptual, and hierarchical. We have implemented our approach as a part of the project PARSE. It aims at enabling laypersons to construct simple programs by dialog. Our implementation incrementally generates context including occurring entities and actions as well as their conceptualizations, state transitions, and other types of contextual information. Its analyses are knowledge- or rulebased (depending on the context type), but we make use of many well-known probabilistic NLP techniques. In a user study we have shown the feasibility of our approach, achieving F1 scores from 72% up to 98% depending on the type of contextual information. The context model enables us to resolve complex identity relations. However, quantifying this effect is subject to future work. Likewise, we plan to investigate whether our context model is useful for other language understanding tasks, e.g., anaphora resolution, topic analysis, or correction of automatic speech recognition errors.
@inproceedings{weigelt_context_2017,title={Context {{Model Acquisition}} from {{Spoken Utterances}}},booktitle={Proceedings of {{The}} 29th {{International Conference}} on {{Software Engineering}} \& {{Knowledge Engineering}}},author={Weigelt, Sebastian and Hey, Tobias and Tichy, Walter F.},date={2017-07-05},pages={201--206},location={{Pittsburgh, PA}},doi={10.18293/SEKE2017-083},eventtitle={{{SEKE}} 2017},langid={english}}
Current systems with spoken language interfaces do not leverage contextual information. Therefore, they struggle with understanding speakers’ intentions. We propose a system that creates a context model from user utterances to overcome this lack of information. It comprises eight types of contextual information organized in three layers: individual, conceptual, and hierarchical. We have implemented our approach as a part of the project PARSE. It aims at enabling laypersons to construct simple programs by dialog. Our implementation incrementally generates context including occurring entities and actions as well as their conceptualizations, state transitions, and other types of contextual information. Its analyses are knowledge- or rule-based (depending on the context type), but we make use of many well-known probabilistic NLP techniques. In a user study we have shown the feasibility of our approach, achieving F1F1 scores from 72% up to 98% depending on the type of contextual information. The context model enables us to resolve complex identity relations. However, quantifying this effect is subject to future work. Likewise, we plan to investigate whether our context model is useful for other language understanding tasks, e.g. anaphora resolution, topic analysis, or correction of automatic speech recognition errors.
@article{weigelt_context_2017a,title={Context {{Model Acquisition}} from {{Spoken Utterances}}},author={Weigelt, Sebastian and Hey, Tobias and Tichy, Walter F.},date={2017-11-01},journaltitle={International Journal on Software Engineering and Knowledge Engineering},volume={27},pages={1439--1453},issn={0218-1940},doi={10.1142/S0218194017400058},url={https://www.worldscientific.com/doi/abs/10.1142/S0218194017400058},urldate={2018-07-03},issue={09n10}}
We investigate natural language as an alternative to programming languages. Natural language would empower anyone to program with minimal training. In this paper, we solve an ordering problem that arises in natural-language programming. An emprical study showed that users do not always provide the strict sequential order of steps needed for execution on a computer. Instead, temporal expressions involving "before", "after", "while", "at the end", and others are used to indicate an order other than the textual one. We present an analysis that extracts the intended time line by exploiting temporal clues. The technique is analyzed in the context of Alice, a 3D programming environment, and AliceNLP, a system for programming Alice in ordinary English. Extracting temporal order could also be useful for analyzing reports, question answering, help desk requests, and big data applications.
@inproceedings{landhausser_deriving_2014,title={Deriving {{Time Lines}} from {{Texts}}},booktitle={Proceedings of the 3rd {{International Workshop}} on {{Realizing Artificial Intelligence Synergies}} in {{Software Engineering}}},author={Landhäußer, Mathias and Hey, Tobias and Tichy, Walter F.},date={2014},series={{{RAISE}} 2014},pages={45--51},publisher={{ACM}},location={{New York, NY, USA}},doi={10.1145/2593801.2593809},url={http://doi.acm.org/10.1145/2593801.2593809},urldate={2018-10-26},isbn={978-1-4503-2846-3},keywords={Alice,end-user programming,Natural language processing,programming with natural language,temporal expressions,temporal reasoning,time lines}}