@inbook{1cc57434ef6d4a0ea95e900727c75015,
title = "Investigating the effect of automatic MWE recognition on CCG parsing",
abstract = "The objective of this work is to find out whether or not information about Multiword Expressions (MWEs) can improve parsing with Combinatory Categorial Grammar (CCG). Inspired by studies that have shown the benefit of using information about MWEs for parsing, we transform the representation of some MWEs in CCGbank by collapsing them to one token. In contrast with those studies, we use information about MWEs obtained automatically, in order to find out if automatic MWE recognition can be used to help parsing. We look at two different effects that such an approach can lead to. Training on the transformed data can help improve parsing accuracy. We call this a training effect. Transformed data can help the parser in its decisions. We call this a parsing effect. Our model significantly outperforms the baseline model on the transformed gold standard, which indicates that there is a training effect. Our model performs significantly better on the transformed gold standard when the transformation is done before parsing as opposed to after parsing which indicates that there is a parsing effect. We show that these results can lead to improved performance on the non-transformed standard benchmark although we fail to show that it does so significantly. We conclude that despite the limited settings (our transformation algorithm is only able to deal with MWEs that do not cross constituent boundaries), there are noticeable improvements from using MWEs in parsing. We discuss ways in which the incorporation of MWEs into parsing can be improved and hypothesize that this will lead to more substantial results. We obtain different results with recognisers that detect different types of MWE and therefore emphasise the need to experiment with different recognisers. In this way, we can find out what types of MWEs this method is best suited to.",
keywords = "Multiword Expressions, MWEs, Combinatory Categorial Grammar, CCG, Deep parsing, linguistic theories and applications",
author = "{de Lhoneux}, Miryam and Omri Abend and Mark Steedman",
year = "2019",
month = jun,
day = "1",
doi = "10.5281/zenodo.2579045",
language = "אנגלית",
isbn = "978-3-96110-146-7",
series = "Phraseology and Multiword Expressions",
publisher = "Language Science Press",
pages = "183--215",
editor = "Yannick Parmentier and Jakub Waszczuk",
booktitle = "Representation and Parsing of Multiword Expressions",
}