
Publications
At ELSA, we aim to inspire and share knowledge within our network and beyond. The collection of publications below provides an overview of both the network’s own output and research we support. Labels distinguish the categories.
Please note that this list makes no claims of being complete. If you have published a paper which is related to ELSA and should be listed, please reach out to our Press and Communications team.
0000
Scano, Christian; Floris, Giuseppe; Montaruli, Biagio; Demetrio, Luca; Valenza, Andrea; Compagna, Luca; Ariu, Davide; Piras, Luca; Balzarotti, Davide; Biggio, Battista
ModSec-Learn: Boosting ModSecurity with Machine Learning Proceedings Article
In: Mehmood, Rashid; Hernández, Guillermo; Praça, Isabel; Wikarek, Jaroslaw; Loukanova, Roussanka; dos Reis, Arsénio Monteiro; Skarmeta, Antonio; Lombardi, Eleonora (Ed.): Distributed Computing and Artificial Intelligence, Special Sessions I, 21st International Conference, 0000.
@inproceedings{10.1007/978-3-031-76459-2_3,
title = {ModSec-Learn: Boosting ModSecurity with Machine Learning},
author = {Christian Scano and Giuseppe Floris and Biagio Montaruli and Luca Demetrio and Andrea Valenza and Luca Compagna and Davide Ariu and Luca Piras and Davide Balzarotti and Battista Biggio},
editor = {Rashid Mehmood and Guillermo Hernández and Isabel Praça and Jaroslaw Wikarek and Roussanka Loukanova and Arsénio Monteiro dos Reis and Antonio Skarmeta and Eleonora Lombardi},
booktitle = {Distributed Computing and Artificial Intelligence, Special Sessions I, 21st International Conference},
abstract = {"ModSecurity is widely recognized as the standard open-source Web Application Firewall (WAF), maintained by the OWASP Foundation. It detects malicious requests by matching them against the Core Rule Set (CRS), identifying well-known attack patterns. Each rule is manually assigned a weight based on the severity of the corresponding attack, and a request is blocked if the sum of the weights of matched rules exceeds a given threshold. However, we argue that this strategy is largely ineffective against web attacks, as detection is only based on heuristics and not customized on the application to protect. In this work, we overcome this issue by proposing a machine-learning model that uses the CRS rules as input features. Through training, ModSec-Learn is able to tune the contribution of each CRS rule to predictions, thus adapting the severity level to the web applications to protect. Our experiments show that ModSec-Learn achieves a significantly better trade-off between detection and false positive rates. Finally, we analyze how sparse regularization can reduce the number of rules that are relevant at inference time, by discarding more than 30% of the CRS rules. We release our open-source code and the dataset at https://github.com/pralab/modsec-learnand https://github.com/pralab/http-traffic-dataset, respectively."},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Franco, Danilo; Oneto, Luca; Anguita, Davide
Fair Empirical Risk Minimization Revised Proceedings Article
In: Rojas, Ignacio; Joya, Gonzalo; Catala, Andreu (Ed.): "Advances in Computational Intelligence", 0000.
@inproceedings{10.1007/978-3-031-43085-5_3,
title = {Fair Empirical Risk Minimization Revised},
author = {Danilo Franco and Luca Oneto and Davide Anguita},
editor = {Ignacio Rojas and Gonzalo Joya and Andreu Catala},
booktitle = {"Advances in Computational Intelligence"},
abstract = {"Artificial Intelligence is nowadays ubiquitous, thanks to a continuous process of commodification, revolutionizing but also impacting society at large. In this paper, we address the problem of algorithmic fairness in Machine Learning: ensuring that sensitive information does not unfairly influence the outcome of a classifier. We extend the Fair Empirical Risk Minimization framework [10] where the fair risk minimizer is estimated via constrained empirical risk minimization. In particular, we first propose a new, more general, notion of fairness which translates into a fairness constraint. Then, we propose a new convex relaxation with stronger consistency properties deriving both risk and fairness bounds. By extending our approach to kernel methods, we will also show that the proposal empirically over-performs the state-of-the-art Fair Empirical Risk Minimization approach on several real-world datasets."},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Caffagni, Davide; Barraco, Manuele; Cornia, Marcella; Baraldi, Lorenzo; Cucchiara, Rita
SynthCap: Augmenting Transformers with Synthetic Data for Image Captioning Proceedings Article
In: Foresti, Gian Luca; Fusiello, Andrea; Hancock, Edwin (Ed.): "Image Analysis and Processing – ICIAP 2023", 0000.
@inproceedings{10.1007/978-3-031-43148-7_10,
title = {SynthCap: Augmenting Transformers with Synthetic Data for Image Captioning},
author = {Davide Caffagni and Manuele Barraco and Marcella Cornia and Lorenzo Baraldi and Rita Cucchiara},
editor = {Gian Luca Foresti and Andrea Fusiello and Edwin Hancock},
booktitle = {"Image Analysis and Processing – ICIAP 2023"},
abstract = {"Image captioning is a challenging task that combines Computer Vision and Natural Language Processing to generate descriptive and accurate textual descriptions for input images. Research efforts in this field mainly focus on developing novel architectural components to extend image captioning models and using large-scale image-text datasets crawled from the web to boost final performance. In this work, we explore an alternative to web-crawled data and augment the training dataset with synthetic images generated by a latent diffusion model. In particular, we propose a simple yet effective synthetic data augmentation framework that is capable of significantly improving the quality of captions generated by a standard Transformer-based model, leading to competitive results on the COCO dataset."},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Cocchi, Federico; Baraldi, Lorenzo; Poppi, Samuele; Cornia, Marcella; Baraldi, Lorenzo; Cucchiara, Rita
Unveiling the Impact of Image Transformations on Deepfake Detection: An Experimental Analysis Proceedings Article
In: Foresti, Gian Luca; Fusiello, Andrea; Hancock, Edwin (Ed.): "Image Analysis and Processing – ICIAP 2023", 0000.
@inproceedings{10.1007/978-3-031-43153-1_29,
title = {Unveiling the Impact of Image Transformations on Deepfake Detection: An Experimental Analysis},
author = {Federico Cocchi and Lorenzo Baraldi and Samuele Poppi and Marcella Cornia and Lorenzo Baraldi and Rita Cucchiara},
editor = {Gian Luca Foresti and Andrea Fusiello and Edwin Hancock},
booktitle = {"Image Analysis and Processing – ICIAP 2023"},
abstract = {"With the recent explosion of interest in visual Generative AI, the field of deepfake detection has gained a lot of attention. In fact, deepfake detection might be the only measure to counter the potential proliferation of generated media in support of fake news and its consequences. While many of the available works limit the detection to a pure and direct classification of fake versus real, this does not translate well to a real-world scenario. Indeed, malevolent users can easily apply post-processing techniques to generated content, changing the underlying distribution of fake data. In this work, we provide an in-depth analysis of the robustness of a deepfake detection pipeline, considering different image augmentations, transformations, and other pre-processing steps. These transformations are only applied in the evaluation phase, thus simulating a practical situation in which the detector is not trained on all the possible augmentations that can be used by the attacker. In particular, we analyze the performance of a $$k$$-NN and a linear probe detector on the COCOFake dataset, using image features extracted from pre-trained models, like CLIP and DINO. Our results demonstrate that while the CLIP visual backbone outperforms DINO in deepfake detection with no augmentation, its performance varies significantly in presence of any transformation, favoring the robustness of DINO."},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Baraldi, Lorenzo; Cocchi, Federico; Cornia, Marcella; Baraldi, Lorenzo; Nicolosi, Alessandro; Cucchiara, Rita
Contrasting Deepfakes Diffusion via Contrastive Learning and Global-Local Similarities Proceedings Article
In: Leonardis, Aleš; Ricci, Elisa; Roth, Stefan; Russakovsky, Olga; Sattler, Torsten; Varol, Gül (Ed.): "Computer Vision – ECCV 2024", 0000.
@inproceedings{10.1007/978-3-031-73036-8_12,
title = {Contrasting Deepfakes Diffusion via Contrastive Learning and Global-Local Similarities},
author = {Lorenzo Baraldi and Federico Cocchi and Marcella Cornia and Lorenzo Baraldi and Alessandro Nicolosi and Rita Cucchiara},
editor = {Aleš Leonardis and Elisa Ricci and Stefan Roth and Olga Russakovsky and Torsten Sattler and Gül Varol},
booktitle = {"Computer Vision – ECCV 2024"},
abstract = {"Discerning between authentic content and that generated by advanced AI methods has become increasingly challenging. While previous research primarily addresses the detection of fake faces, the identification of generated natural images has only recently surfaced. This prompted the recent exploration of solutions that employ foundation vision-and-language models, like CLIP. However, the CLIP embedding space is optimized for global image-to-text alignment and is not inherently designed for deepfake detection, neglecting the potential benefits of tailored training and local image features. In this study, we propose CoDE (Contrastive Deepfake Embeddings), a novel embedding space specifically designed for deepfake detection. CoDE is trained via contrastive learning by additionally enforcing global-local similarities. To sustain the training of our model, we generate a comprehensive dataset that focuses on images generated by diffusion models and encompasses a collection of 9.2 million images produced by using four different generators. Experimental results demonstrate that CoDE achieves state-of-the-art accuracy on the newly collected dataset, while also showing excellent generalization capabilities to unseen image generators. Our source code, trained models, and collected dataset are publicly available at: https://github.com/aimagelab/CoDE."},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Cappelletti, Silvia; Baraldi, Lorenzo; Cocchi, Federico; Cornia, Marcella; Baraldi, Lorenzo; Cucchiara, Rita
Adapt to Scarcity: Few-Shot Deepfake Detection via Low-Rank Adaptation Proceedings Article
In: Antonacopoulos, Apostolos; Chaudhuri, Subhasis; Chellappa, Rama; Liu, Cheng-Lin; Bhattacharya, Saumik; Pal, Umapada (Ed.): "Pattern Recognition", 0000.
@inproceedings{10.1007/978-3-031-78305-0_8,
title = {Adapt to Scarcity: Few-Shot Deepfake Detection via Low-Rank Adaptation},
author = {Silvia Cappelletti and Lorenzo Baraldi and Federico Cocchi and Marcella Cornia and Lorenzo Baraldi and Rita Cucchiara},
editor = {Apostolos Antonacopoulos and Subhasis Chaudhuri and Rama Chellappa and Cheng-Lin Liu and Saumik Bhattacharya and Umapada Pal},
booktitle = {"Pattern Recognition"},
abstract = {"The boundary between AI-generated images and real photographs is becoming increasingly narrow, thanks to the realism provided by contemporary generative models. Such technological progress necessitates the evolution of existing deepfake detection algorithms to counter new threats and protect the integrity of perceived reality. Although the prevailing approach among deepfake detection methodologies relies on large collections of generated and real data, the efficacy of these methods in adapting to scenarios characterized by data scarcity remains uncertain. This obstacle arises due to the introduction of novel generation algorithms and proprietary generative models that impose restrictions on access to large-scale datasets, thereby constraining the availability of generated images. In this paper, we first analyze how the performance of current deepfake methodologies, based on the CLIP embedding space, adapt in a few-shot situation over four state-of-the-art generators. Being the CLIP embedding space not specifically tailored for the task, a fine-tuning stage is desirable, although the amount of data needed is often unavailable in a data scarcity scenario. To address this issue and limit possible overfitting, we introduce a novel approach through the Low-Rank Adaptation (LoRA) of the CLIP architecture, tailored for few-shot deepfake detection scenarios. Remarkably, the LoRA-modified CLIP, even when fine-tuned with merely 50 pairs of real and fake images, surpasses the performance of all evaluated deepfake detection models across the tested generators. Additionally, when LoRA CLIP is benchmarked against other models trained on 1,000 samples and evaluated on generative models not seen during training it exhibits superior generalization capabilities."},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Quinzan, Francesco; Casolo, Cecilia; Muandet, Krikamol; Luo, Yucen; Kilbertus, Niki
Learning Counterfactually Invariant Predictors Journal Article
In: 0000.
@article{Quinzan2024,
title = {Learning Counterfactually Invariant Predictors},
author = {Francesco Quinzan and Cecilia Casolo and Krikamol Muandet and Yucen Luo and Niki Kilbertus},
url = {"https://publications.cispa.de/articles/journal_contribution/Learning_Counterfactually_Invariant_Predictors_/26818285"},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
König, Matthias; Zhang, Xiyue; Hoos, Holger H.; Kwiatkowska, Marta; Rijn, Jan N.
Automated Design of Linear Bounding Functions for Sigmoidal Nonlinearities in Neural Networks Proceedings Article
In: Bifet, Albert; Davis, Jesse; Krilavičius, Tomas; Kull, Meelis; Ntoutsi, Eirini; Žliobaitė, Indrė (Ed.): "Machine Learning and Knowledge Discovery in Databases. Research Track", 0000.
@inproceedings{10.1007/978-3-031-70368-3_23,
title = {Automated Design of Linear Bounding Functions for Sigmoidal Nonlinearities in Neural Networks},
author = {Matthias König and Xiyue Zhang and Holger H. Hoos and Marta Kwiatkowska and Jan N. Rijn},
editor = {Albert Bifet and Jesse Davis and Tomas Krilavičius and Meelis Kull and Eirini Ntoutsi and Indrė Žliobaitė},
booktitle = {"Machine Learning and Knowledge Discovery in Databases. Research Track"},
abstract = {"The ubiquity of deep learning algorithms in various applications has amplified the need for assuring their robustness against small input perturbations such as those occurring in adversarial attacks. Existing complete verification techniques offer provable guarantees for all robustness queries but struggle to scale beyond small neural networks. To overcome this computational intractability, incomplete verification methods often rely on convex relaxation to over-approximate the nonlinearities in neural networks."},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Loiseau, Thibaut; Vu, Tuan-Hung; Chen, Mickael; Pérez, Patrick; Cord, Matthieu
Reliability in Semantic Segmentation: Can We Use Synthetic Data? Proceedings Article
In: Leonardis, Aleš; Ricci, Elisa; Roth, Stefan; Russakovsky, Olga; Sattler, Torsten; Varol, Gül (Ed.): Computer Vision – ECCV 2024, 0000.
@inproceedings{10.1007/978-3-031-73337-6_25,
title = {Reliability in Semantic Segmentation: Can We Use Synthetic Data?},
author = {Thibaut Loiseau and Tuan-Hung Vu and Mickael Chen and Patrick Pérez and Matthieu Cord},
editor = {Aleš Leonardis and Elisa Ricci and Stefan Roth and Olga Russakovsky and Torsten Sattler and Gül Varol},
booktitle = {Computer Vision – ECCV 2024},
abstract = {"Assessing the robustness of perception models to covariate shifts and their ability to detect out-of-distribution (OOD) inputs is crucial for safety-critical applications such as autonomous vehicles. By nature of such applications, however, the relevant data is difficult to collect and annotate. In this paper, we show for the first time how synthetic data can be specifically generated to assess comprehensively the real-world reliability of semantic segmentation models. By fine-tuning Stable Diffusion [31] with only in-domain data, we perform zero-shot generation of visual scenes in OOD domains or inpainted with OOD objects. This synthetic data is employed to evaluate the robustness of pretrained segmenters, thereby offering insights into their performance when confronted with real edge cases. Through extensive experiments, we demonstrate a high correlation between the performance of models when evaluated on our synthetic OOD data and when evaluated on real OOD inputs, showing the relevance of such virtual testing. Furthermore, we demonstrate how our approach can be utilized to enhance the calibration and OOD detection capabilities of segmenters. Code and dataare made public."},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Lu, Lorenzo Li; D'Ascenzi, Giulia; Borlino, Francesco Cappio; Tommasi, Tatiana
Large Class Separation is Not What You Need for Relational Reasoning-Based OOD Detection Proceedings Article
In: Foresti, Gian Luca; Fusiello, Andrea; Hancock, Edwin (Ed.): Image Analysis and Processing – ICIAP 2023, 0000.
@inproceedings{10.1007/978-3-031-43153-1_25,
title = {Large Class Separation is Not What You Need for Relational Reasoning-Based OOD Detection},
author = {Lorenzo Li Lu and Giulia D'Ascenzi and Francesco Cappio Borlino and Tatiana Tommasi},
editor = {Gian Luca Foresti and Andrea Fusiello and Edwin Hancock},
booktitle = {Image Analysis and Processing – ICIAP 2023},
abstract = {"Standard recognition approaches are unable to deal with novel categories at test time. Their overconfidence on the known classes makes the predictions unreliable for safety-critical applications such as healthcare or autonomous driving. Out-Of-Distribution (OOD) detection methods provide a solution by identifying semantic novelty. Most of these methods leverage a learning stage on the known data, which means training (or fine-tuning) a model to capture the concept of normality. This process is clearly sensitive to the amount of available samples and might be computationally expensive for on-board systems. A viable alternative is that of evaluating similarities in the embedding space produced by large pre-trained models without any further learning effort. We focus exactly on such a fine-tuning-free OOD detection setting."},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Biswas, Sayan; Jung, Kangsoo; Palamidessi, Catuscia
Tight Differential Privacy Guarantees for the Shuffle Model with k-Randomized Response Proceedings Article
In: Mosbah, Mohamed; Sèdes, Florence; Tawbi, Nadia; Ahmed, Toufik; Boulahia-Cuppens, Nora; Garcia-Alfaro, Joaquin (Ed.): "Foundations and Practice of Security", 0000.
@inproceedings{10.1007/978-3-031-57537-2_27,
title = {Tight Differential Privacy Guarantees for the Shuffle Model with k-Randomized Response},
author = {Sayan Biswas and Kangsoo Jung and Catuscia Palamidessi},
editor = {Mohamed Mosbah and Florence Sèdes and Nadia Tawbi and Toufik Ahmed and Nora Boulahia-Cuppens and Joaquin Garcia-Alfaro},
booktitle = {"Foundations and Practice of Security"},
abstract = {"Most differentially private algorithms assume a central model in which a reliable third party inserts noise to queries made on datasets, or a local model where the data owners directly perturb their data. However, the central model is vulnerable via a single point of failure, and the local model has the disadvantage that the utility of the data deteriorates significantly. The recently proposed shuffle model is an intermediate framework between the central and local paradigms. In the shuffle model, data owners send their locally privatized data to a server where messages are shuffled randomly, making it impossible to trace the link between a privatized message and the corresponding sender. In this paper, we theoretically derive the tightest known differential privacy guarantee for the shuffle models with k-Randomized Response (k-RR) local randomizers, under histogram queries, and we denoise the histogram produced by the shuffle model using the matrix inversion method to evaluate the utility of the privacy mechanism. We perform experiments on both synthetic and real data to compare the privacy-utility trade-off of the shuffle model with that of the central one privatized by adding the state-of-the-art Gaussian noise to each bin. We see that the difference in statistical utilities between the central and the shuffle models shows that they are almost comparable under the same level of differential privacy protection."},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}