<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.1 20151215//EN" "http://jats.nlm.nih.gov/publishing/1.1/JATS-journalpublishing1.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:mml="http://www.w3.org/1998/Math/MathML" xml:lang="en" article-type="research-article" dtd-version="1.1">
<front>
<journal-meta>
<journal-id journal-id-type="pmc">CMC</journal-id>
<journal-id journal-id-type="nlm-ta">CMC</journal-id>
<journal-id journal-id-type="publisher-id">CMC</journal-id>
<journal-title-group>
<journal-title>Computers, Materials &#x0026; Continua</journal-title>
</journal-title-group>
<issn pub-type="epub">1546-2226</issn>
<issn pub-type="ppub">1546-2218</issn>
<publisher>
<publisher-name>Tech Science Press</publisher-name>
<publisher-loc>USA</publisher-loc>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">72840</article-id>
<article-id pub-id-type="doi">10.32604/cmc.2025.072840</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Article</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Advancing Android Ransomware Detection with Hybrid AutoML and Ensemble Learning Approaches</article-title>
<alt-title alt-title-type="left-running-head">Advancing Android Ransomware Detection with Hybrid AutoML and Ensemble Learning Approaches</alt-title>
<alt-title alt-title-type="right-running-head">Advancing Android Ransomware Detection with Hybrid AutoML and Ensemble Learning Approaches</alt-title>
</title-group>
<contrib-group>
<contrib id="author-1" contrib-type="author">
<name name-style="western"><surname>Ganapathiyappan</surname><given-names>Kirubavathi</given-names></name><xref ref-type="aff" rid="aff-1">1</xref></contrib>
<contrib id="author-2" contrib-type="author">
<name name-style="western"><surname>Ravikumar</surname><given-names>Chahana</given-names></name><xref ref-type="aff" rid="aff-1">1</xref></contrib>
<contrib id="author-3" contrib-type="author">
<name name-style="western"><surname>Ranganayaki</surname><given-names>Raghul Alagunachimuthu</given-names></name><xref ref-type="aff" rid="aff-1">1</xref></contrib>
<contrib id="author-4" contrib-type="author">
<name name-style="western"><surname>Altameem</surname><given-names>Ayman</given-names></name><xref ref-type="aff" rid="aff-2">2</xref></contrib>
<contrib id="author-5" contrib-type="author" corresp="yes">
<name name-style="western"><surname>Rehman</surname><given-names>Ateeq Ur</given-names></name><xref ref-type="aff" rid="aff-3">3</xref><xref rid="cor1" ref-type="corresp">&#x002A;</xref><email>202411144@gachon.ac.kr</email></contrib>
<contrib id="author-6" contrib-type="author" corresp="yes">
<name name-style="western"><surname>Almogren</surname><given-names>Ahmad</given-names></name><xref ref-type="aff" rid="aff-4">4</xref><xref rid="cor1" ref-type="corresp">&#x002A;</xref><email>202411144@gachon.ac.kr</email></contrib>
<aff id="aff-1"><label>1</label><institution>Department of Mathematics, Amrita School of Physical Sciences, Coimbatore, Amrita Vishwa Vidyapeetham</institution>, <addr-line>Coimbator, 641112</addr-line>, <country>India</country></aff>
<aff id="aff-2"><label>2</label><institution>Department of Computer Science and Engineering, College of Applied Studies, King Saud University</institution>, <addr-line>Riyadh, 11543</addr-line>, <country>Saudi Arabia</country></aff>
<aff id="aff-3"><label>3</label><institution>School of Computing, Gachon University</institution>, <addr-line>Seongnam-si, 13120</addr-line>, <country>Republic of Korea</country></aff>
<aff id="aff-4"><label>4</label><institution>Department of Computer Science, College of Computer and Information Sciences, King Saud University</institution>, <addr-line>Riyadh, 11633</addr-line>, <country>Saudi Arabia</country></aff>
</contrib-group>
<author-notes>
<corresp id="cor1"><label>&#x002A;</label>Corresponding Authors: Ateeq Ur Rehman. Email: <email>202411144@gachon.ac.kr</email>; Ahmad Almogren. Email:
<email>ahalmogren@ksu.edu.sa</email></corresp>
</author-notes>
<pub-date date-type="collection" publication-format="electronic">
<year>2026</year>
</pub-date>
<pub-date date-type="pub" publication-format="electronic">
<day>10</day><month>2</month><year>2026</year>
</pub-date>
<volume>87</volume>
<issue>1</issue>
<elocation-id>27</elocation-id>
<history>
<date date-type="received">
<day>04</day>
<month>09</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>17</day>
<month>11</month>
<year>2025</year>
</date>
</history>
<permissions>
<copyright-statement>&#x00A9; 2026 The Authors.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>Published by Tech Science Press.</copyright-holder>
<license xlink:href="https://creativecommons.org/licenses/by/4.0/">
<license-p>This work is licensed under a <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution 4.0 International License</ext-link>, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
</license>
</permissions>
<self-uri content-type="pdf" xlink:href="TSP_CMC_72840.pdf"></self-uri>
<abstract>
<p>Android smartphones have become an integral part of our daily lives, becoming targets for ransomware attacks. Such attacks encrypt user information and ask for payment to recover it. Conventional detection mechanisms, such as signature-based and heuristic techniques, often fail to detect new and polymorphic ransomware samples. To address this challenge, we employed various ensemble classifiers, such as Random Forest, Gradient Boosting, Bagging, and AutoML models. We aimed to showcase how AutoML can automate processes such as model selection, feature engineering, and hyperparameter optimization, to minimize manual effort while ensuring or enhancing performance compared to traditional approaches. We used this framework to test it with a publicly available dataset from the Kaggle repository, which contains features for Android ransomware network traffic. The dataset comprises 392,024 flow records, divided into eleven groups. There are ten classes for various ransomware types, including SVpeng, PornDroid, Koler, WannaLocker, and Lockerpin. There is also a class for regular traffic. We applied a three-step procedure to select the most relevant features: filter, wrapper, and embedded methods. The Bagging classifier was highly accurate, correctly getting 99.84% of the time. The FLAML AutoML framework was even more accurate, correctly getting 99.85% of the time. This is indicative of how well AutoML performs in improving things with minimal human assistance. Our findings indicate that AutoML is an efficient, scalable, and flexible method to discover Android ransomware, and it will facilitate the development of next-generation intrusion detection systems.</p>
</abstract>
<kwd-group kwd-group-type="author">
<kwd>Automated machine learning (AutoML)</kwd>
<kwd>ensemble learning</kwd>
<kwd>intrusion detection system (IDS)</kwd>
<kwd>ransomware traffic analysis</kwd>
<kwd>android ransomware detection</kwd>
</kwd-group>
<funding-group>
<award-group id="awg1">
<funding-source>Ongoing Research Funding Program</funding-source>
<award-id>ORF-2025-498</award-id>
</award-group>
<award-group id="awg2">
<funding-source>King Saud University</funding-source>
</award-group>
</funding-group>
</article-meta>
</front>
<body>
<sec id="s1">
<label>1</label>
<title>Introduction</title>
<p>In the virtual world, Android phones are now an integral part of our daily lives. They are vital to business, medicine, entertainment, finance, and communication. This critical role demonstrates the importance of ensuring adequate security protection. The Internet is linking more and more devices and services, which means that the number and sophistication of cyberattacks with which we must deal are increasing. Malware attacks are among the most significant threats, as they generate billions of dollars in losses worldwide [<xref ref-type="bibr" rid="ref-1">1</xref>,<xref ref-type="bibr" rid="ref-2">2</xref>]. Ransomware is currently among the most destructive forms of attack. There are numerous forms of this, such as crypto-ransomware and lock-screen ransomware. The Lock-screen ransomware blocks you from accessing by displaying imitation warning messages. However, Crypto-ransomware encrypts valuable documents, photos, and videos, making them unrecoverable without a decryption key [<xref ref-type="bibr" rid="ref-3">3</xref>,<xref ref-type="bibr" rid="ref-4">4</xref>]. Simplocker, LockerPin, and WannaLocker are among the popular sets of ransomware that illustrate how attacks are becoming increasingly sophisticated and perilous. There is a need to develop effective methods for detecting cyber threats, as they can cause significant harm and are becoming increasingly difficult to identify.</p>
<p>The primary methods by which traditional identification methods perform well are the use of signatures and heuristics. They are suitable for known threats but not very effective for polymorphic or new ones [<xref ref-type="bibr" rid="ref-5">5</xref>,<xref ref-type="bibr" rid="ref-6">6</xref>]. Static analysis techniques are not effective, as attackers often employ code obfuscation and encryption to bypass security measures [<xref ref-type="bibr" rid="ref-7">7</xref>]. Dynamic analysis is another option, but it requires sandbox testing and substantial computational power, making it difficult to scale in real-time [<xref ref-type="bibr" rid="ref-8">8</xref>,<xref ref-type="bibr" rid="ref-9">9</xref>]. The area of machine learning (ML)-based ransomware detection has grown significantly, as it can identify both traditional and novel attack patterns by examining distinctive behavioral characteristics [<xref ref-type="bibr" rid="ref-10">10</xref>]. However, all the ML models we currently have utilize a single classifier, which is not always effective in terms of generalization and is not particularly efficient against various types of ransomware. Ensemble learning has improved accuracy, but requires manual selection of features and optimization of hyperparameters, making it less flexible and efficient [<xref ref-type="bibr" rid="ref-11">11</xref>]. Additionally, it is challenging to identify threats early on, as most approaches do not detect attacks until significant damage has been caused [<xref ref-type="bibr" rid="ref-12">12</xref>]. In this research, a thorough comparative study is conducted among conventional ensemble-based techniques (such as Random Forest, Gradient Boosting, and Bagging) and Automated Machine Learning (AutoML) systems (like TPOT, EvalML, and FLAML) to address these challenges. AutoML, on the other hand, automatically finds the best models, simplifies hyperparameter tuning, and improves generalization. This differs from typical ML processes, which require manual optimization. The goal of this study is to show that AutoML can automate the model-building process and match or beat the performance of traditional ensemble classifiers. This would be an effective and scalable method for discovering Android ransomware. The key contributions of this work are summarized as follows:
<list list-type="bullet">
<list-item>
<p><bold>Automated Detection Pipeline:</bold> We introduce a completely automated Android ransomware detection system that combines a three-stage hybrid feature selection approach (filter, wrapper, and embedded methods) with AutoML-based classification, thus minimizing the need for manual configuration and enhancing performance.</p></list-item>
<list-item>
<p><bold>Reorganizing Classes in a Hierarchical Manner:</bold> Through the application of hierarchical clustering to consolidate ransomware families into groups that behave similarly, generalization can be enhanced, class imbalance minimized, and training simplified.</p></list-item>
<list-item>
<p><bold>Strict Baseline Comparison:</bold> Stratified K-fold cross-validation is used to test rigorous traditional ensemble models to ensure that they are fair and reliable. The results show that AutoML-powered models, particularly FLAML, have better detection accuracy than traditional ensemble baselines, demonstrating their ability to adapt to evolving cyber-attack patterns.</p></list-item>
<list-item>
<p><bold>Evaluation:</bold> We offer complete results, including confusion matrices, ROC curves, learning curves, and importance of permutation characteristics that demonstrate the robustness, reproducibility, and scalability of the proposed framework for real-world applications.</p></list-item>
</list></p>
</sec>
<sec id="s2">
<label>2</label>
<title>Related Works and Research Gap</title>
<p>Current advancements in intrusion detection have heavily relied on machine learning (ML) and deep learning (DL) techniques to identify malicious activity across various topics, including Android ransomware. Intrusion detection systems (IDS) mainly detect threats at the host and network levels. However, their theoretical foundations are highly relevant to the Android environment, where ransomware is a rapidly increasing attack type. Current research on IDS from 2025 [<xref ref-type="bibr" rid="ref-13">13</xref>] suggests that adaptive anomaly detection and multimodal feature utilization are crucial for making systems more resilient against evolving cyber threats. These frameworks were not created solely for the purpose of ransomware. Still, they do illustrate the importance of utilizing detection models that work well in various diverse scenarios and can be automated and scaled. These are also significant concepts in this research. Old-style static analysis techniques, such as feature extraction from app permissions, have been only moderately effective against Android ransomware, especially when the malware is evasive or constantly changing. To address these issues, researchers gradually began to employ dynamic behavioral profiling, i.e., observing API calls, network activity, and system logs during the program&#x2019;s execution. This approach reduces the likelihood that systems will be targeted by sophisticated ransomware and makes it easier to identify new and established methods by which hackers attack.</p>
<p>Numerous research studies have employed the use of ML and DL models to identify ransomware using these concepts. For example, the authors in [<xref ref-type="bibr" rid="ref-14">14</xref>] proposed an ensemble machine learning approach trained on 203,556 network traffic samples, including benign data and ten ransomware families. Their models achieved precision, recall, and F1-scores that were all above 99%, and feature importance analysis revealed significant behavioral features. They further indicated that certain classes had poorer true positive rates and emphasized the necessity for adaptive methods that can deal with new ransomware types.</p>
<p>Meanwhile, most current research on locating Android malware has increasingly emphasized employing both static and dynamic analysis to avoid problems each technique has independently. Static analysis is effective in identifying bugs, but it may not always be successful against code obfuscation or zero-day threats. Dynamic analysis, on the other hand, costs more in terms of time and processing power, but it gives you a better understanding of behavior. In this area, the DL-AMDet framework is a big step forward. It possesses a static detection module that employs CNN-BiLSTM and an autoencoder-based anomaly detection module. This hybrid approach achieved 99.935% accuracy, which was superior to that of the majority of other state-of-the-art models [<xref ref-type="bibr" rid="ref-15">15</xref>]. These frameworks have served us well, but they still demonstrate that there are trade-offs between their accuracy, scalability, and the difficulty of extracting features. These concerns underscore the need for more research into optimally improved hybrid deep learning solutions, primarily through the combination of ensemble techniques and AutoML systems that can automate feature selection, model optimization, and accommodation of the changing nature of malware.</p>
<p>An Automated Android Malware Detection framework (AAMD-OELAC) was proposed in [<xref ref-type="bibr" rid="ref-16">16</xref>]. It integrates LS-SVM, KELM, and RRVFLN with hyperparameter tuning using a hunter-prey algorithm that significantly enhances detection rates. However, although it performed satisfactorily, the system required frequent updates to remain effective in response to the other side&#x2019;s alterations. Ahmed et al. [<xref ref-type="bibr" rid="ref-17">17</xref>] compared 392,035 network traffic records and evaluated DT, SVM, KNN, FNN, and TabNet models for binary classification. The work highlighted issues related to computational complexity and poor generalization, despite SVM achieving a 100% recall rate and DT attaining an accuracy rate of 97.24%. It recommended the usage of a combination of various techniques to improve things. There have also been studies on deep learning methods. Khan et al. [<xref ref-type="bibr" rid="ref-18">18</xref>] proposed an LSTM model based on eight feature selection techniques and majority voting to recognize 19 key features from the CI-CAndMal2017 dataset. The optimized LSTM achieved 97.08% accuracy, surpassing previous benchmarks, but larger datasets are required for further verification. Ali et al. [<xref ref-type="bibr" rid="ref-19">19</xref>] presented MALGRA, a dynamic-analysis-driven malware detection system that extracted API-call N-grams and then applied TF-IDF(Term Frequency-Inverse Document Frequency) for selecting the most discriminative behavioral features. The work compared the performance of several classical machine learning models such as Logistic Regression, Random Forest, Decision Tree, and Naive Bayes and found that logistic regression performed best with an accuracy of 98.4% for malware/benign datasets. Interestingly, their results demonstrated that behavioral N-gram features combined with lightweight ML classifiers can outperform many traditional static opcode-based approaches, especially against malware using obfuscation and evasion. AutoML is a game-changing technique in this area only introduced in the recent past. Brown et al. [<xref ref-type="bibr" rid="ref-20">20</xref>] demonstrated the efficacy of AutoML for large-scale malware detection with the SOREL-20M and EMBER-2018 datasets, where AutoML-tuned FFNNs and CNNs outperformed hand-crafted pipelines. Bromberg and Gitzinger [<xref ref-type="bibr" rid="ref-21">21</xref>] developed DroidAutoML, a scalable microservice framework for automatically selecting models and hyperparameters. This was a significant improvement over Drebin and MaMaDroid. All the same, these works pointed out challenges in real-time deployment and the need for adaptive strategies in the face of evolving threat environments. Feature selection has also been a key area of focus. Masum et al. [<xref ref-type="bibr" rid="ref-22">22</xref>] coupled DT, RF, NB, LR, and NN classifiers with feature selection. They proved that Random Forest had higher accuracy, F-beta, and precision measures. Khammas [<xref ref-type="bibr" rid="ref-23">23</xref>] developed a static-analysis technique which operates on raw bytes and employs Gain Ratio to identify the best 1000 n-gram features with an accuracy of 97.74%. Although these results were excellent, it was more challenging to generalize against obfuscated binaries, as it employed only static techniques.</p>
<p>Despite these advances, several issues remain to be solved. Most modern approaches rely on either static or dynamic analysis, which are inefficient against obfuscation techniques or run slowly, making them less effective. Many machine learning models are still based on single classifiers or basic ensembles with hyperparameters that have been manually set. This makes it more difficult to scale and protect against new attacks. Additionally, previous research often utilizes datasets that are too small or outdated, rendering them less applicable in practice. Even ensemble approaches tend not to have automation or explainability, which makes them less effective for operational security. <xref ref-type="table" rid="table-1">Table 1</xref> provides an overview of significant works, focusing on their datasets, classification strategies, performance measures, and key issues. The table indicates that the majority of recent approaches rely heavily on static analysis or specific traffic datasets, making it challenging for them to handle new types of ransomware. Ensemble and deep learning models have enhanced detection precision but tend to require manual feature engineering and hyperparameter adjustment, rendering them much less scalable as threats constantly evolve. Existing research on AutoML-based techniques holds promise but primarily targets general malware detection. It does not frequently utilize class reorganization or hybrid feature selection to address class imbalance and high-dimensional data issues. To address these issues, this research provides an end-to-end and scalable Android ransomware detection mechanism based on hierarchical class grouping, a three-phase hybrid feature selection process, and ensemble learning powered by AutoML. This mechanism is designed to enhance accuracy, flexibility, and replicability while minimizing human intervention, thereby creating a more effective defense mechanism in practical scenarios.</p>
<table-wrap id="table-1">
<label>Table 1</label>
<caption>
<title>Summary of related works on Android ransomware detection, including datasets, methods, results, and limitations</title>
</caption>
<table>
<colgroup>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th>Reference</th>
<th>Major contribution</th>
<th>Classi. type</th>
<th>No. of features</th>
<th>No. of samples</th>
<th>Key results</th>
<th>Limitations</th>
</tr>
</thead>
<tbody>
<tr>
<td>Hossain et al. [<xref ref-type="bibr" rid="ref-14">14</xref>]</td>
<td>Ensemble ML for Android ransomware with feature selection</td>
<td>Binary/<break/>Multiclass</td>
<td>85 (10 selected)</td>
<td>203,556</td>
<td>AdaBoost: 99.1%, Stacking: 98.9%, RF: 98.7%</td>
<td>Focused on static features; generalization to unseen families not tested.</td>
</tr>
<tr>
<td>Alamro et al. [<xref ref-type="bibr" rid="ref-16">16</xref>]</td>
<td>AAMD-OELAC ensemble with metaheuristic tuning</td>
<td>Binary</td>
<td>Auto- engineered</td>
<td>7500</td>
<td>AAMD-OELAC: 96.4% (Hunter&#x2013;Prey optimization)</td>
<td>Small dataset; scalability on large datasets not validated.</td>
</tr>
<tr>
<td>Albin Ahmed et al. [<xref ref-type="bibr" rid="ref-17">17</xref>]</td>
<td>ML/DL models for ransomware via traffic analysis</td>
<td>Binary</td>
<td>85 (19 selected)</td>
<td>392,035 (86,182 balanced)</td>
<td>RF: 95.2%, DT: 94.8% (PSO-selected features)</td>
<td>Limited to traffic-based data; runtime efficiency not evaluated.</td>
</tr>
<tr>
<td>Khan et al. [<xref ref-type="bibr" rid="ref-18">18</xref>]</td>
<td>LSTM-based ransomware detector with feature selection</td>
<td>Binary</td>
<td>19</td>
<td>40,000</td>
<td>CNN&#x2013;LSTM Hybrid: 94.3%</td>
<td>High model complexity; needs larger datasets for generalization.</td>
</tr>
<tr>
<td>Ali et al. [<xref ref-type="bibr" rid="ref-19">19</xref>]</td>
<td>Dynamic-analysis-based ML using API-call N-grams &#x002B; TF&#x2013;IDF</td>
<td>Binary</td>
<td>API-call N-grams (1&#x2013;6) extracted from sandbox logs</td>
<td>120 samples (60 malware, 60 benign)</td>
<td>LR: 98.4%</td>
<td>Strong behavioural features; limited dataset size; relies on sandbox execution.</td>
</tr>
<tr>
<td>Brown et al. [<xref ref-type="bibr" rid="ref-20">20</xref>]</td>
<td>AutoML-based deep learning on large malware datasets</td>
<td>Binary</td>
<td>2381 (EMBER)</td>
<td><inline-formula id="ieqn-1"><mml:math id="mml-ieqn-1"><mml:mo>&#x223C;</mml:mo></mml:math></inline-formula>20M (SOREL-20M)</td>
<td>NAS-optimized CNN: 97.2%</td>
<td>Requires massive datasets and high compute resources.</td>
</tr>
<tr>
<td>Bromberg and Gitzinger<break/>[<xref ref-type="bibr" rid="ref-21">21</xref>]</td>
<td>DroidAutoML for automated model selection/tuning</td>
<td>Binary</td>
<td>24&#x2013;82 (reduced)</td>
<td>Millions (6 datasets)</td>
<td>AutoML RF: 96.1%, KNN: 94.3%</td>
<td>Limited validation on Android ransomware datasets.</td>
</tr>
<tr>
<td>Masum et al. [<xref ref-type="bibr" rid="ref-22">22</xref>]</td>
<td>Feature-selection framework for ransomware detection</td>
<td>Multi-class</td>
<td>Not specified</td>
<td>Not specified</td>
<td>RF (Accuracy not clearly reported)</td>
<td>Dataset details unclear; reproducibility issues.</td>
</tr>
<tr>
<td>Khammas<break/>[<xref ref-type="bibr" rid="ref-23">23</xref>]</td>
<td>Static byte-level 3-gram analysis with feature selection</td>
<td>Binary</td>
<td>1000 (3-g)</td>
<td>1680</td>
<td>RF: 91.7% (Byte n-grams)</td>
<td>Vulnerable to obfuscation/packing; purely static approach.</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s3">
<label>3</label>
<title>Proposed Framework</title>
<p>In this research, a fully automated framework overcomes the shortcomings of conventional Android ransomware detection by comparing rigorous ensemble learning algorithms with state-of-the-art AutoML algorithms such as FLAML, TPOT, and EvalML. AutoML streamlines the process by automatically selecting a model, extracting features, and tuning hyperparameters. This contrasts with manually building pipelines and tuning them. This automation reduces the number of personnel required and enables the handling of the ransomware threat&#x2019;s dynamic nature. The system employs a hierarchical clustering approach to categorize ransomware families into broader groups based on their behavior. This method enhances class balance and facilitates straightforward generalization. Additionally, the entire preprocessing pipeline, along with a hybrid feature selection approach in three stages, including filter, wrapper, and embedded techniques, has been utilized. This method is helpful to minimize dimensionality without losing the ability to differentiate between things. The proposed framework offers a twofold perspective by contrasting legacy ensemble methods and pipelines generated by AutoML, which optimize independently from start to end. Experimental results show that AutoML, specifically FLAML, consistently outperforms the best ensemble baselines in terms of accuracy. It also makes notable improvements in efficiency and scalability. This research lays the groundwork for future advancements in improving next-generation ransomware detection systems. <xref ref-type="fig" rid="fig-1">Fig. 1</xref> shows the whole process of the proposed framework for Android ransomware detection.</p>
<fig id="fig-1">
<label>Figure 1</label>
<caption>
<title>Proposed framework for Android ransomware detection using AutoML and ensemble baselines</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMC_72840-fig-1.tif"/>
</fig>
<sec id="s3_1">
<label>3.1</label>
<title>Materials and Environment Setup</title>
<p>Experiments were conducted partly in a Kaggle notebook environment and partly on a local Windows machine equipped with an Intel Core i7 (14-core) processor and 16 GB of RAM. All experiments were conducted in Python 3.11 using conventional scikit-learn packages, along with state-of-the-art Automated Machine Learning (AutoML) libraries such as FLAML, TPOT, and EvalML, as well as state-of-the-art boosting algorithms including XGBoost, LightGBM, and CatBoost. GPU acceleration was enabled for TPOT with NVIDIA RAPIDS/cuML where possible, and was run in a WSL2 (Windows Subsystem for Linux v2.3) environment. In Kaggle notebooks, GPU acceleration was enabled through the runtime settings of the notebook. The original dataset, after being loaded into memory, consumed approximately 258 MB of RAM. A fixed random seed (42) was used across all experiments to ensure reproducibility of results.</p>
<p>For EvalML, experiments were executed with <bold>EvalML 0.84</bold> and Python 3.11. The core dependencies were NumPy (<inline-formula id="ieqn-2"><mml:math id="mml-ieqn-2"><mml:mo>&#x2265;</mml:mo></mml:math></inline-formula>1.24), pandas (<inline-formula id="ieqn-3"><mml:math id="mml-ieqn-3"><mml:mo>&#x2265;</mml:mo></mml:math></inline-formula>1.5), scikit-learn (<inline-formula id="ieqn-4"><mml:math id="mml-ieqn-4"><mml:mo>&#x2265;</mml:mo></mml:math></inline-formula>1.2), and matplotlib (<inline-formula id="ieqn-5"><mml:math id="mml-ieqn-5"><mml:mo>&#x2265;</mml:mo></mml:math></inline-formula>3.7), in addition to the optional package <monospace>nlp_primitives</monospace> for text/NLP features for Featuretools. The packages were all installed collectively, and the kernel was restarted to prevent potential binary incompatibility issues. The installation command utilized for complete reproducibility is:</p>
<p><monospace>pip install &#x2013;upgrade pip; pip install numpy&#x003E;&#x003D;1.24 pandas&#x003E;&#x003D;1.5 scikit-learn&#x003E;&#x003D;1.2 matplotlib&#x003E;&#x003D;3.7 evalml&#x003D;&#x003D;0.84 nlp_primitives</monospace></p>
<p>The computer environment and software versions used are listed in <xref ref-type="table" rid="table-2">Table 2</xref>, allowing for precise replication of the reported findings.</p>
<table-wrap id="table-2">
<label>Table 2</label>
<caption>
<title>Software and library versions</title>
</caption>
<table>
<colgroup>
<col align="center"/>
<col align="center"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th>Category</th>
<th>Model/Library</th>
<th>Version</th>
</tr>
</thead>
<tbody>
<tr>
<td>Core Ensemble models</td>
<td>Scikit-learn (Random Forest, Gradient Boosting, HistGradient Boosting, Extra Trees, Bagging, AdaBoost, Decision Tree)</td>
<td>1.2.2</td>
</tr>
<tr>
<td rowspan="3">AutoML Framework</td>
<td>EvalML</td>
<td>0.84.0</td>
</tr>
<tr>
<td>FLAML</td>
<td>2.3.4</td>
</tr>
<tr>
<td>TPOT</td>
<td>1.0.0</td>
</tr>
<tr>
<td rowspan="3">Boosting models</td>
<td>XGBoost</td>
<td>2.0.3</td>
</tr>
<tr>
<td>LightGBM</td>
<td>4.5.0</td>
</tr>
<tr>
<td>CatBoost</td>
<td>1.2.7</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s3_2">
<label>3.2</label>
<title>Dataset Description</title>
<p>The work in this research utilizes a publicly available Android ransomware dataset from Kaggle [<xref ref-type="bibr" rid="ref-24">24</xref>], which contains both benign and ransomware network traffic samples. The dataset comprises 392,034 network flow records, each described by 86 features, where 81 are numeric and five are categorical. There are 43,091 benign samples and 348,943 ransomware samples representing various Android ransomware families such as Simplocker, LockerPin, and WannaLocker. This allows for an 11-class multi-class classification task where 10 class labels represent different ransomware families and one label represents the benign category. Each sample point refers to a network flow instance and is characterized by flow-based features that encompass connection identifiers, protocol-level features, temporal behaviors, statistical metrics, and TCP flag information. A full preprocessing pipeline was established to ensure data integrity before model development. The dataset was thoroughly checked for duplicates, infinite values, and missing (NaN) values sequentially. Duplicates were not found. Infinite values were substituted with NaN, and after conducting null checks, it was ensured that no rows had to be removed. The final dataset size was therefore not altered, thereby ensuring its completeness and quality. The final dataset held 348,943 ransomware samples (88.99%) and 43,091 benign samples (11.01%), as indicated by <xref ref-type="table" rid="table-3">Table 3</xref>.</p>
<table-wrap id="table-3">
<label>Table 3</label>
<caption>
<title>Dataset distribution after preprocessing</title>
</caption>
<table>
<colgroup>
<col align="center"/>
<col align="center"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th>Class</th>
<th>Sample count</th>
<th>Proportion (%)</th>
</tr>
</thead>
<tbody>
<tr>
<td>Ransomware (Grouped Families)</td>
<td>348,943</td>
<td>88.99</td>
</tr>
<tr>
<td>Benign</td>
<td>43,091</td>
<td>11.01</td>
</tr>
<tr>
<td><bold>Total</bold></td>
<td>392,034</td>
<td>100.00</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s3_3">
<label>3.3</label>
<title>Initial Approaches</title>
<p>The experimental setup utilizes a multi-stage methodology to tackle the inherent problems involved in Android ransomware classification, i.e., class imbalance, feature dimensionality, and the need for automated, scalable model optimization. The solution proposed combines three complementary elements: (i) hierarchical clustering-based class restructuring, (ii) a hybrid feature selection pipeline to counter dimensionality, and (iii) exploration of conventional ensemble learning approaches vs. AutoML-based pipelines. Both of these combine to constitute a comprehensive solution that not only improves classification accuracy but also represents the practical advantages of automation.</p>
<p><bold>Hierarchical Clustering for Class Grouping:</bold> The ransomware families usually share the same behavioral patterns, which can cause confusion during the classification phase and lead to noise overfitting in a dataset-specific way. To address this limitation, we performed hierarchical clustering to combine ransomware families at both semantic and behavioral levels, based on class-level centroids. This class reformulation has three valuable advantages:
<list list-type="simple">
<list-item><label>1.</label><p><bold>Class Imbalance Mitigation:</bold> Consolidating minority classes with behaviorally related families reduces class imbalance, minimizing bias toward the dominant classes while preserving semantic meaning.</p></list-item>
<list-item><label>2.</label><p><bold>Computational Efficiency:</bold> Reducing the classification problem from 11 to 8 classes decreases training complexity by approximately 27%, resulting in faster training times and reduced computational overhead.</p></list-item>
<list-item><label>3.</label><p><bold>Improved Generalization:</bold> Grouping into broader behavioral categories makes the model robust, enabling better detection of new ransomware specimens of the same behavioral category.</p></list-item>
</list></p>
<p>The preprocessed dataset was transformed into a sparse high-dimensional matrix. Class centroids were computed to represent each ransomware family within the feature space. For a class <inline-formula id="ieqn-6"><mml:math id="mml-ieqn-6"><mml:msub><mml:mi>C</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:math></inline-formula> with <inline-formula id="ieqn-7"><mml:math id="mml-ieqn-7"><mml:msub><mml:mi>n</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:math></inline-formula> samples, its centroid <inline-formula id="ieqn-8"><mml:math id="mml-ieqn-8"><mml:msub><mml:mi>&#x03BC;</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:math></inline-formula> is computed as:
<disp-formula id="eqn-1"><label>(1)</label><mml:math id="mml-eqn-1" display="block"><mml:msub><mml:mi>&#x03BC;</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:msub><mml:mi>n</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mfrac><mml:munder><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:msub><mml:mi>x</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:mo>&#x2208;</mml:mo><mml:msub><mml:mi>C</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mrow></mml:munder><mml:msub><mml:mi>x</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:math></disp-formula>where <inline-formula id="ieqn-9"><mml:math id="mml-ieqn-9"><mml:msub><mml:mi>x</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:math></inline-formula> is the feature vector of a sample belonging to class <inline-formula id="ieqn-10"><mml:math id="mml-ieqn-10"><mml:msub><mml:mi>C</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:math></inline-formula>, these centroids are representative mean points for their respective classes. To estimate inter-class similarity, we calculated pairwise Euclidean distances between centroids. For two centroids, <inline-formula id="ieqn-11"><mml:math id="mml-ieqn-11"><mml:msub><mml:mi>&#x03BC;</mml:mi><mml:mi>a</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mo stretchy="false">[</mml:mo><mml:msub><mml:mi>&#x03BC;</mml:mi><mml:mrow><mml:mi>a</mml:mi><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mo>&#x2026;</mml:mo><mml:mo>,</mml:mo><mml:msub><mml:mi>&#x03BC;</mml:mi><mml:mrow><mml:mi>a</mml:mi><mml:mi>D</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">]</mml:mo></mml:math></inline-formula> and <inline-formula id="ieqn-12"><mml:math id="mml-ieqn-12"><mml:msub><mml:mi>&#x03BC;</mml:mi><mml:mi>b</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mo stretchy="false">[</mml:mo><mml:msub><mml:mi>&#x03BC;</mml:mi><mml:mrow><mml:mi>b</mml:mi><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mo>&#x2026;</mml:mo><mml:mo>,</mml:mo><mml:msub><mml:mi>&#x03BC;</mml:mi><mml:mrow><mml:mi>b</mml:mi><mml:mi>D</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">]</mml:mo></mml:math></inline-formula>, their Euclidean distance is:
<disp-formula id="eqn-2"><label>(2)</label><mml:math id="mml-eqn-2" display="block"><mml:mi>d</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>&#x03BC;</mml:mi><mml:mi>a</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>&#x03BC;</mml:mi><mml:mi>b</mml:mi></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:msqrt><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>j</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>D</mml:mi></mml:mrow></mml:munderover><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>&#x03BC;</mml:mi><mml:mrow><mml:mi>a</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>&#x03BC;</mml:mi><mml:mrow><mml:mi>b</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:msup><mml:mo stretchy="false">)</mml:mo><mml:mn>2</mml:mn></mml:msup></mml:msqrt></mml:math></disp-formula></p>
<p>With these distances, hierarchical agglomerative clustering based on Ward&#x2019;s linkage was employed in iteratively merging the most similar classes. The gain in within-cluster sum of squares (SSE) when merging two clusters <inline-formula id="ieqn-13"><mml:math id="mml-ieqn-13"><mml:msub><mml:mi>C</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:math></inline-formula> and <inline-formula id="ieqn-14"><mml:math id="mml-ieqn-14"><mml:msub><mml:mi>C</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:math></inline-formula> is calculated as:<disp-formula id="eqn-3"><label>(3)</label><mml:math id="mml-eqn-3" display="block"><mml:mi mathvariant="normal">&#x0394;</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>C</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>C</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:msub><mml:mi>n</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:msub><mml:mi>n</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow><mml:mrow><mml:msub><mml:mi>n</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi>n</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:mfrac><mml:msup><mml:mrow><mml:mo>|</mml:mo><mml:msub><mml:mi>&#x03BC;</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>&#x03BC;</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:mo>|</mml:mo></mml:mrow><mml:mn>2</mml:mn></mml:msup></mml:math></disp-formula>where <inline-formula id="ieqn-15"><mml:math id="mml-ieqn-15"><mml:mo fence="false" stretchy="false">&#x2016;</mml:mo><mml:msub><mml:mi>&#x03BC;</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>&#x03BC;</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:msup><mml:mo fence="false" stretchy="false">&#x2016;</mml:mo><mml:mn>2</mml:mn></mml:msup></mml:math></inline-formula> is the Euclidean distance between cluster centers. The resulting dendrogram showed clear behavioral similarities between different ransomware families. This step reduced the number of output classes from 11 to 8, unifying similar families into higher-level behavioral groups.</p>
<p>Clustering was used only on ransomware families. The benign category was left distinct to allow models to continue differentiating between malicious and benign traffic. This method minimizes noise by clustering statistically equivalent ransomware families into broader classes, thereby improving robustness and reducing the possibility of overfitting to small variations within families.</p>
<p>The in-depth outcomes of the clustering process, such as dendrogram visualization and the final grouped class distribution, are discussed in <xref ref-type="sec" rid="s4">Section 4</xref>.</p>
<p><italic>Pre-Processing</italic></p>
<p>Preprocessing of data is necessary to ensure the integrity, consistency, and suitability of the dataset for developing machine learning models. The following steps were utilized uniformly to prepare the Android ransomware dataset:
<list list-type="simple">
<list-item><label>1.</label><p><bold>Elimination of Duplicate Records:</bold> Potential duplicate rows were identified using the <monospace>duplicated()</monospace> command and removed to maintain data integrity and prevent redundancy during model training.</p></list-item>
<list-item><label>2.</label><p><bold>Handling Missing and Infinite Values:</bold> Infinite and large numerical values were substituted with NaN for consistent presentation. Such rows with NaN values were further removed to have a complete and consistent dataset.</p></list-item>
<list-item><label>3.</label><p><bold>Feature Segmentation and Normalization:</bold> The data was separated into numerical and categorical features to make it easier for correct preprocessing. The numerical features were scaled using the assistance of <italic>StandardScaler</italic>, as in <xref ref-type="disp-formula" rid="eqn-4">Eq. (4)</xref>, to enable the characteristics to be compared:
<disp-formula id="eqn-4"><label>(4)</label><mml:math id="mml-eqn-4" display="block"><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mrow><mml:mtext>scaled</mml:mtext></mml:mrow></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi>X</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mi>&#x03BC;</mml:mi></mml:mrow><mml:mi>&#x03C3;</mml:mi></mml:mfrac></mml:math></disp-formula>
where <inline-formula id="ieqn-16"><mml:math id="mml-ieqn-16"><mml:mi>&#x03BC;</mml:mi></mml:math></inline-formula> and <inline-formula id="ieqn-17"><mml:math id="mml-ieqn-17"><mml:mi>&#x03C3;</mml:mi></mml:math></inline-formula> are the mean and standard deviation of each numerical feature, respectively.</p></list-item>
<list-item><label>4.</label><p><bold>Categorical Encoding:</bold> The categorical features were encoded into numerical representations using <italic>LabelEncoder</italic>, preserving category identity without the dimensionality increase that is associated with One-Hot Encoding.</p></list-item>
</list></p>
<p>With these preprocessing steps, the dataset was rendered standardized, noise-free, and well-structured, providing a sound foundation for feature selection, class clustering, and subsequent model training.</p>
</sec>
<sec id="s3_4">
<label>3.4</label>
<title>Hybrid Feature Selection Strategy</title>
<p>Feature selection is a critical part of machine learning workflows that improves dimensionality reduction, eliminates duplicate or unnecessary features, and improves model performance and computational cost. For these purposes, we employed a three-stage hybrid feature selection strategy that leverages the strengths of filter, wrapper, and embedded methods by capitalizing on their complementary advantages. This approach is mathematically discussed in Algorithm 1.</p>
<fig id="fig-9">
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMC_72840-fig-9.tif"/>
</fig>
<p>This three-step architecture combines the speed of filter methods and model-agnostic features, the interaction sensitivity of the wrapper method, and the embedded regularization&#x2019;s sparsity-promoting feature. The outcome is a small highly discriminative feature subset that enhances generalization, minimizes overfitting, and minimizes training time. Constant or near-constant-value columns were eliminated in preprocessing, as seen in <xref ref-type="table" rid="table-4">Table 4</xref>, before executing the feature selection pipeline.</p>
<table-wrap id="table-4">
<label>Table 4</label>
<caption>
<title>List of columns with constant values</title>
</caption>
<table>
<colgroup>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th align="center">Feature name</th>
</tr>
</thead>
<tbody>
<tr>
<td>BwdAvgPacketsPerBulk</td>
</tr>
<tr>
<td>FwdURGFlags</td>
</tr>
<tr>
<td>FwdAvgBulkRate</td>
</tr>
<tr>
<td>RSTFlagCount</td>
</tr>
<tr>
<td>ECEFlagCount</td>
</tr>
<tr>
<td>BwdPSHFlags</td>
</tr>
<tr>
<td>BwdURGFlags</td>
</tr>
<tr>
<td>CWEFlagCount</td>
</tr>
<tr>
<td>BwdAvgBytesPerBulk</td>
</tr>
<tr>
<td>FwdAvgBytesPerBulk</td>
</tr>
<tr>
<td>FwdAvgPacketsPerBulk</td>
</tr>
<tr>
<td>BwdAvgBulkRate</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>This was done because qualities with fixed values exhibit no sample-to-sample variation and thus bring no discriminatory power to the model. Additionally, they may lead to an unwanted increase in computational complexity without improving performance.</p>
<p><bold>Step 1: Filter Method.</bold> We started by applying the <monospace>SelectKBest</monospace> method with Mutual Information (<monospace>mutual_info_classif</monospace>) as the scoring metric. This approach assesses the relationship between each feature and the target variable, selecting the top 20 features that yield the greatest information gain in classification.</p>
<p><bold>Step 2: Wrapper Method.</bold> The learnt features from the filter method were then enhanced by Recursive Feature Elimination (RFE) with Random Forest classifier as the base estimator. RFE removes the least important feature recursively based on model performance until we are left with the top 10 most significant features.</p>
<p><bold>Step 3: Embedded Method.</bold> We then employed L1-regularized (Lasso) regression with <monospace>Select</monospace><monospace>FromModel</monospace> on the outcome of RFE. L1 regularization was selected because it possesses the ability to impose sparsity by setting the weights of less informative features to a specific value, thereby supporting both feature selection and model learning. This helps curb redundancy in the high-dimensional feature space, enhances generalization by reducing the probabilities of overfitting, and emphasizes the most discriminative features involved in ransomware detection. <xref ref-type="table" rid="table-5">Table 5</xref> presents the features selected at each step of the feature selection.</p>
<table-wrap id="table-5">
<label>Table 5</label>
<caption>
<title>Features selected by filter, wrapper, and embedded methods</title>
</caption>
<table>
<colgroup>
<col align="center"/>
<col align="center"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th>Filter</th>
<th>Wrapper</th>
<th>Embedded</th>
</tr>
</thead>
<tbody>
<tr>
<td>FlowID</td>
<td>FlowID</td>
<td>SourceIP</td>
</tr>
<tr>
<td>SourceIP</td>
<td>SourceIP</td>
<td>SourcePort</td>
</tr>
<tr>
<td>SourcePort</td>
<td>SourcePort</td>
<td>DestinationIP</td>
</tr>
<tr>
<td>DestinationIP</td>
<td>DestinationIP</td>
<td>TimeStamp</td>
</tr>
<tr>
<td>TimeStamp</td>
<td>TimeStamp</td>
<td>FlowPacketsPerS</td>
</tr>
<tr>
<td>FlowDuration</td>
<td>FlowDuration</td>
<td>FlowIATMean</td>
</tr>
<tr>
<td>TotalLengthOfFwdPackets</td>
<td>FlowPacketsPerS</td>
<td>FlowIATMax</td>
</tr>
<tr>
<td>FwdPacketLengthMax</td>
<td>FlowIATMean</td>
<td>InitWinBytesForward</td>
</tr>
<tr>
<td>FwdPacketLengthMean</td>
<td>FlowIATMax</td>
<td></td>
</tr>
<tr>
<td>FwdPacketLengthStd</td>
<td>InitWinBytesForward</td>
<td></td>
</tr>
<tr>
<td>FlowPacketsPerS</td>
<td></td>
<td></td>
</tr>
<tr>
<td>FlowIATMean</td>
<td></td>
<td></td>
</tr>
<tr>
<td>FlowIATMax</td>
<td></td>
<td></td>
</tr>
<tr>
<td>PacketLengthMean</td>
<td></td>
<td></td>
</tr>
<tr>
<td>PacketLengthStd</td>
<td></td>
<td></td>
</tr>
<tr>
<td>PacketLengthVariance</td>
<td></td>
<td></td>
</tr>
<tr>
<td>AveragePacketSize</td>
<td></td>
<td></td>
</tr>
<tr>
<td>AvgFwdSegmentSize</td>
<td></td>
<td></td>
</tr>
<tr>
<td>SubflowFwdBytes</td>
<td></td>
<td></td>
</tr>
<tr>
<td>InitWinBytesForward</td>
<td></td>
<td></td>
</tr>
</tbody>
</table>
</table-wrap>
<p>As a measure of the discrimination capability of the chosen set of features, we employed <bold>Permutation Importance</bold>, a model-agnostic interpretability technique that estimates the importance of each feature by measuring how predictive accuracy decreases when we randomly perturb individual features. This method allows that the importance scores of the features not to be affected by the internal weighting scheme of a given model; hence, we have a fair and unbiased evaluation. After feature selection, the final features and their permutation importance scores are listed in <xref ref-type="table" rid="table-6">Table 6</xref>. It indicates that <bold>TimeStamp</bold> was identified as the most significant feature (importance score: 0.6022), indicating the vital role played by temporal patterns in identifying malicious activity. This finding aligns with previous studies that highlight the importance of timing anomalies and burst patterns as key features of ransomware activity. Network-layer features such as <bold>SourceIP</bold> and <bold>DestinationIP</bold> also scored highly, once more emphasizing the importance of IP-level traffic patterns in discriminating between benign and malicious flows. Additionally, temporal features derived from flows like <bold>FlowIATMean</bold>, <bold>FlowIATMax</bold>, and <bold>InitWinBytesForward</bold> were significant contributors by capturing inter-arrival time aspects and window-based flow behavior. Although the features <bold>SourcePort</bold> and <bold>FlowPacketsPerS</bold> had lower individual importance scores, they are still valuable additions whose collective contribution enhances the model&#x2019;s discriminative ability. In conclusion, the selected feature subset optimizes the trade-off between dimensionality reduction and predictive performance preservation. By selecting the most informative features, the generated models are more effective, less prone to overfitting, and better at generalizing across new ransomware strains. This reduced feature set served as a basis for subsequent ensemble and AutoML-based classification trials.</p>
<table-wrap id="table-6">
<label>Table 6</label>
<caption>
<title>Final set of selected features and their permutation importance scores</title>
</caption>
<table>
<colgroup>
<col align="center"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th>Feature</th>
<th>Importance score</th>
</tr>
</thead>
<tbody>
<tr>
<td>TimeStamp</td>
<td>0.602158</td>
</tr>
<tr>
<td>SourceIP</td>
<td>0.509532</td>
</tr>
<tr>
<td>DestinationIP</td>
<td>0.071642</td>
</tr>
<tr>
<td>InitWinBytesForward</td>
<td>0.005049</td>
</tr>
<tr>
<td>SourcePort</td>
<td>0.002935</td>
</tr>
<tr>
<td>FlowIATMean</td>
<td>0.002203</td>
</tr>
<tr>
<td>FlowIATMax</td>
<td>0.001763</td>
</tr>
<tr>
<td>FlowPacketsPerS</td>
<td>0.000073</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s3_5">
<label>3.5</label>
<title>Ensemble-Based Approaches</title>
<p>Ensemble learning is especially useful in the scenario of Android ransomware detection, primarily because it averts overfitting, which is the issue with most high-dimensional and imbalanced datasets. By aggregating several base learners, ensemble approaches make predictions more stable, decrease variance, and prevent one single model from memorizing noise or artifacts from minority classes. This feature is vital in security datasets, where generalization to unknown families of ransomware needs to be made [<xref ref-type="bibr" rid="ref-14">14</xref>,<xref ref-type="bibr" rid="ref-22">22</xref>]. This work compares some of the most popular ensemble methods as baselines. Random Forest-based bagging alleviates variance by bootstrap aggregation and majority voting. Boosting algorithms, such as Gradient Boosting, XGBoost, CatBoost, and LightGBM, iteratively train a sequence of weak learners to reduce residual errors and achieve state-of-the-art classification performance on tabular problems, including traffic analysis and malware classification. <bold>Extra Trees</bold>, a variant of Random Forest, adds extra randomization in choosing splits and further improves variance reduction with increased computational efficiency. Histogram-based boosting variants enhance scalability by feature binning, thereby accelerating training on extremely large datasets. Recent contributions like <bold>PerpetualBooster</bold> provide a hyperparameter-free alternative by adjusting boosting iterations and depth through a single budget parameter, effectively solving the tuning problem that exists within conventional ensembles. Combined, these models provide coverage of bagging, boosting, randomization-based, and parameter-free methods, creating a thorough baseline collection for measuring AutoML pipelines. This creates a robust performance benchmark and emphasizes the additional benefits of automation for enhancing scalability.</p>
<sec id="s3_5_1">
<label>3.5.1</label>
<title>Justification of Baseline Selection</title>
<p>The nine baseline models capture the key ensemble learning paradigms typically employed in network security:
<list list-type="bullet">
<list-item>
<p><bold>Bagging:</bold> Bagging and Random Forest reduce variance through averaging predictions across bootstrapped samples.</p></list-item>
<list-item>
<p><bold>Boosting:</bold> Gradient Boosting, HistGradientBoosting, XGBoost, LightGBM, and CatBoost work to fit sequentially to minimize residuals with high predictive capability.</p></list-item>
<list-item>
<p><bold>Randomization-Based:</bold> Extra Trees employ random selection of splits to enhance generalization and efficiency.</p></list-item>
<list-item>
<p><bold>Classical Baseline:</bold> AdaBoost is added as a baseline classical boosting reference, even though it is prone to overfitting on highly unbalanced datasets.</p></list-item>
</list></p>
</sec>
<sec id="s3_5_2">
<label>3.5.2</label>
<title>Consistency of Experimental Design</title>
<p>To promote fairness, all baseline models were trained from a common stratified 80:20 train/test split and underwent identical preprocessing operations, that is, deletion of duplicates, scaling, label encoding, and class organization in hierarchy. This ensures that if performance differences are observed, they will reflect the model&#x2019;s ability and not the leakage of data, distribution changes, or different preprocessing.</p>
</sec>
<sec id="s3_5_3">
<label>3.5.3</label>
<title>Hyperparameter Optimization</title>
<p>All baseline model hyperparameters were systematically optimized with randomized search and stratified 5-fold cross-validation to maximize macro-averaged F1-score. The best hyperparameters can be found in <xref ref-type="table" rid="table-7">Table 7</xref>. All other unmentioned parameters were left with the default values from the library for reproducibility.</p>
<table-wrap id="table-7">
<label>Table 7</label>
<caption>
<title>Optimized hyperparameter configuration for ensemble baselines</title>
</caption>
<table>
<colgroup>
<col align="center"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th>Model</th>
<th>Hyperparameter setting</th>
</tr>
</thead>
<tbody>
<tr>
<td>Random Forest</td>
<td>Configured with 200 decision trees; randomization controlled by seed <inline-formula id="ieqn-30"><mml:math id="mml-ieqn-30"><mml:mo>=</mml:mo><mml:mn>42</mml:mn></mml:math></inline-formula>.</td>
</tr>
<tr>
<td>Gradient Boosting</td>
<td>Uses 200 boosting stages with learning rate <inline-formula id="ieqn-31"><mml:math id="mml-ieqn-31"><mml:mo>=</mml:mo><mml:mn>0.1</mml:mn></mml:math></inline-formula>, 80% subsampling per stage, feature selection by <inline-formula id="ieqn-32"><mml:math id="mml-ieqn-32"><mml:msqrt><mml:mtext>features</mml:mtext></mml:msqrt></mml:math></inline-formula>, maximum tree depth <inline-formula id="ieqn-33"><mml:math id="mml-ieqn-33"><mml:mo>=</mml:mo><mml:mn>5</mml:mn></mml:math></inline-formula>, and a fixed random seed.</td>
</tr>
<tr>
<td>HistGradientBoosting</td>
<td>Trained with default histogram-based splitting, random seed <inline-formula id="ieqn-34"><mml:math id="mml-ieqn-34"><mml:mo>=</mml:mo><mml:mn>42</mml:mn></mml:math></inline-formula>.</td>
</tr>
<tr>
<td>LightGBM</td>
<td>200 boosting rounds, learning rate <inline-formula id="ieqn-35"><mml:math id="mml-ieqn-35"><mml:mo>=</mml:mo><mml:mn>0.1</mml:mn></mml:math></inline-formula>, depth limited to 5 levels, <inline-formula id="ieqn-36"><mml:math id="mml-ieqn-36"><mml:msqrt><mml:mtext>features</mml:mtext></mml:msqrt></mml:math></inline-formula> feature sampling, full parallel execution enabled, random seed <inline-formula id="ieqn-37"><mml:math id="mml-ieqn-37"><mml:mo>=</mml:mo><mml:mn>42</mml:mn></mml:math></inline-formula>, and silent training mode.</td>
</tr>
<tr>
<td>Bagging</td>
<td>Base estimator: decision tree classifier; 200 bootstrap samples, 90% sampling ratio, 80% feature subset per iteration, random seed <inline-formula id="ieqn-38"><mml:math id="mml-ieqn-38"><mml:mo>=</mml:mo><mml:mn>42</mml:mn></mml:math></inline-formula>.</td>
</tr>
<tr>
<td>Extra Trees</td>
<td>200 randomized trees with unlimited depth, minimum split size <inline-formula id="ieqn-39"><mml:math id="mml-ieqn-39"><mml:mo>=</mml:mo><mml:mn>4</mml:mn></mml:math></inline-formula>, minimum leaf size <inline-formula id="ieqn-40"><mml:math id="mml-ieqn-40"><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:math></inline-formula>, parallel execution across cores, random seed <inline-formula id="ieqn-41"><mml:math id="mml-ieqn-41"><mml:mo>=</mml:mo><mml:mn>42</mml:mn></mml:math></inline-formula>.</td>
</tr>
<tr>
<td>XGBoost</td>
<td>100 boosting iterations, learning rate <inline-formula id="ieqn-42"><mml:math id="mml-ieqn-42"><mml:mo>=</mml:mo><mml:mn>0.1</mml:mn></mml:math></inline-formula>, log-loss evaluation metric, label encoder disabled, and fixed random seed.</td>
</tr>
<tr>
<td>CatBoost</td>
<td>100 boosting steps, learning rate <inline-formula id="ieqn-43"><mml:math id="mml-ieqn-43"><mml:mo>=</mml:mo><mml:mn>0.1</mml:mn></mml:math></inline-formula>, tree depth <inline-formula id="ieqn-44"><mml:math id="mml-ieqn-44"><mml:mo>=</mml:mo><mml:mn>6</mml:mn></mml:math></inline-formula>, silent mode enabled, random seed <inline-formula id="ieqn-45"><mml:math id="mml-ieqn-45"><mml:mo>=</mml:mo><mml:mn>42</mml:mn></mml:math></inline-formula>.</td>
</tr>
<tr>
<td>AdaBoost</td>
<td>100 boosting iterations with learning rate <inline-formula id="ieqn-46"><mml:math id="mml-ieqn-46"><mml:mo>=</mml:mo><mml:mn>0.1</mml:mn></mml:math></inline-formula> and fixed random seed.</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
</sec>
<sec id="s3_6">
<label>3.6</label>
<title>AutoML-Based Approaches</title>
<p>AutoML reduces human effort by automatically preprocessing features, selecting models, and adjusting hyperparameters, thereby enhancing scalability and reproducibility. AutoML is especially important in cybersecurity because the model needs to respond quickly to changing threats. We compared three AutoML frameworks <bold>FLAML</bold>, <bold>TPOT</bold>, and <bold>EvalML</bold> that were chosen for their contrasting design philosophies.
<list list-type="bullet">
<list-item>
<p><bold>FLAML:</bold> A computationally efficient, lightweight AutoML library that dynamically scales the time and computational resources to find near-optimal learners within constrained time budgets, making it ideal for repeated retraining in security scenarios [<xref ref-type="bibr" rid="ref-21">21</xref>].</p></list-item>
<list-item>
<p><bold>TPOT:</bold> Uses genetic programming to develop end-to-end machine learning pipelines such as preprocessing, model selection, and hyperparameter optimization. The use of GPU acceleration with NVIDIA RAPIDS/cuML greatly enhances exploration speed on large ransomware datasets [<xref ref-type="bibr" rid="ref-25">25</xref>].</p></list-item>
<list-item>
<p><bold>EvalML:</bold> Offers an interpretable and predictable AutoML process via Bayesian optimization, with built-in categorical data and class imbalance handling, and the ability to create deployment-ready models through automated hyperparameter tuning [<xref ref-type="bibr" rid="ref-26">26</xref>].</p></list-item>
</list></p>
<p>All experiments for AutoML were conducted using uniform preprocessing pipelines, stratified train-test splits, and a fixed random seed (random_state &#x003D; 42) for replicability. A summary of the training settings has been provided in <xref ref-type="table" rid="table-8">Table 8</xref>.</p>
<table-wrap id="table-8">
<label>Table 8</label>
<caption>
<title>AutoML training configurations</title>
</caption>
<table>
<colgroup>
<col align="center"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th>Framework</th>
<th>Configuration</th>
</tr>
</thead>
<tbody>
<tr>
<td>FLAML</td>
<td>time_budget &#x003D; 5000, metric &#x003D; accuracy, task &#x003D; classification, early_stop &#x003D; True, seed &#x003D; 42</td>
</tr>
<tr>
<td>TPOT</td>
<td>max_time_mins &#x003D; 1000, n_jobs &#x003D; 5, use_GPU &#x003D; True (RAPIDS/cuML), scoring &#x003D; &#x2018;accuracy&#x2019;, random_state &#x003D; 42</td>
</tr>
<tr>
<td>EvalML</td>
<td><monospace>AutoMLSearch(X_train, y_train, problem_type &#x003D; &#x201C;multiclass&#x201D;, random_seed &#x003D; 42, optimize_for &#x003D; &#x201C;accuracy&#x201D;)</monospace></td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s3_7">
<label>3.7</label>
<title>Evaluation Phase</title>
<p>Throughout the evaluation stage, the performance of the trained model was assessed using various standard metrics to confirm its effectiveness in detecting Android ransomware. This check was conducted using the test dataset, which was excluded from the training procedure to provide an unbiased evaluation of the model&#x2019;s capability to generalize. The performance metrics used in this study are detailed in <xref ref-type="table" rid="table-9">Table 9</xref>.</p>
<table-wrap id="table-9">
<label>Table 9</label>
<caption>
<title>Description of performance metrics used for evaluation</title>
</caption>
<table>
<colgroup>
<col align="center"/>
<col align="center"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th>Metric</th>
<th>Description</th>
<th>Formula</th>
</tr>
</thead>
<tbody>
<tr>
<td>Accuracy</td>
<td>Calculates the percentage of accurate predictions, including true positives and true negatives, out of all assessed cases.</td>
<td><inline-formula id="ieqn-47"><mml:math id="mml-ieqn-47"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mfrac><mml:mrow><mml:mi>T</mml:mi><mml:mi>P</mml:mi><mml:mo>+</mml:mo><mml:mi>T</mml:mi><mml:mi>N</mml:mi></mml:mrow><mml:mrow><mml:mi>T</mml:mi><mml:mi>P</mml:mi><mml:mo>+</mml:mo><mml:mi>T</mml:mi><mml:mi>N</mml:mi><mml:mo>+</mml:mo><mml:mi>F</mml:mi><mml:mi>P</mml:mi><mml:mo>+</mml:mo><mml:mi>F</mml:mi><mml:mi>N</mml:mi></mml:mrow></mml:mfrac></mml:mstyle></mml:math></inline-formula></td>
</tr>
<tr>
<td>Precision</td>
<td>Fraction of predicted positive instances that were actually correct, reflecting the ability to avoid false positives.</td>
<td><inline-formula id="ieqn-48"><mml:math id="mml-ieqn-48"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mfrac><mml:mrow><mml:mi>T</mml:mi><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mi>T</mml:mi><mml:mi>P</mml:mi><mml:mo>+</mml:mo><mml:mi>F</mml:mi><mml:mi>P</mml:mi></mml:mrow></mml:mfrac></mml:mstyle></mml:math></inline-formula></td>
</tr>
<tr>
<td>Recall</td>
<td>Ability of the model to correctly identify actual positive cases, important for capturing all malware or attack instances.</td>
<td><inline-formula id="ieqn-49"><mml:math id="mml-ieqn-49"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mfrac><mml:mrow><mml:mi>T</mml:mi><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mi>T</mml:mi><mml:mi>P</mml:mi><mml:mo>+</mml:mo><mml:mi>F</mml:mi><mml:mi>N</mml:mi></mml:mrow></mml:mfrac></mml:mstyle></mml:math></inline-formula></td>
</tr>
<tr>
<td>F1-score</td>
<td>Harmonic mean of precision and recall, balancing both metrics, especially under class imbalance.</td>
<td><inline-formula id="ieqn-50"><mml:math id="mml-ieqn-50"><mml:mn>2</mml:mn><mml:mo>&#x00D7;</mml:mo><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mfrac><mml:mrow><mml:mtext>Precision</mml:mtext><mml:mo>&#x00D7;</mml:mo><mml:mtext>Recall</mml:mtext></mml:mrow><mml:mrow><mml:mtext>Precision</mml:mtext><mml:mo>+</mml:mo><mml:mtext>Recall</mml:mtext></mml:mrow></mml:mfrac></mml:mstyle></mml:math></inline-formula></td>
</tr>
<tr>
<td>Balanced Acc.</td>
<td>The average of recall scores across all classes, robust to class imbalance.</td>
<td><inline-formula id="ieqn-51"><mml:math id="mml-ieqn-51"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mfrac><mml:mn>1</mml:mn><mml:mi>C</mml:mi></mml:mfrac></mml:mstyle><mml:msubsup><mml:mo movablelimits="false">&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>C</mml:mi></mml:mrow></mml:msubsup><mml:msub><mml:mtext>Recall</mml:mtext><mml:mi>i</mml:mi></mml:msub></mml:math></inline-formula></td>
</tr>
<tr>
<td>Macro Avg.</td>
<td>Computes the metric independently for each class and then takes the unweighted mean, treating all classes equally.</td>
<td><inline-formula id="ieqn-52"><mml:math id="mml-ieqn-52"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mfrac><mml:mn>1</mml:mn><mml:mi>C</mml:mi></mml:mfrac></mml:mstyle><mml:msubsup><mml:mo movablelimits="false">&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>C</mml:mi></mml:mrow></mml:msubsup><mml:msub><mml:mtext>Metric</mml:mtext><mml:mi>i</mml:mi></mml:msub></mml:math></inline-formula></td>
</tr>
<tr>
<td>Macro AUC</td>
<td>The unweighted average of the Area Under the ROC Curve (AUC) calculated in a one-vs-rest manner for each class.</td>
<td><inline-formula id="ieqn-53"><mml:math id="mml-ieqn-53"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mfrac><mml:mn>1</mml:mn><mml:mi>C</mml:mi></mml:mfrac></mml:mstyle><mml:msubsup><mml:mo movablelimits="false">&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>C</mml:mi></mml:mrow></mml:msubsup><mml:msubsup><mml:mo>&#x222B;</mml:mo><mml:mn>0</mml:mn><mml:mn>1</mml:mn></mml:msubsup><mml:mi>T</mml:mi><mml:mi>P</mml:mi><mml:msub><mml:mi>R</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mi>F</mml:mi><mml:mi>P</mml:mi><mml:msubsup><mml:mi>R</mml:mi><mml:mi>i</mml:mi><mml:mrow><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msubsup><mml:mo stretchy="false">(</mml:mo><mml:mi>u</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo stretchy="false">)</mml:mo><mml:mspace width="thinmathspace" /><mml:mi>d</mml:mi><mml:mi>u</mml:mi></mml:math></inline-formula></td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The performance of the proposed framework was comprehensively evaluated using a stratified 80:20 train-test split, which preserved the original class balance. This ensured that ransomware and benign traffic were represented proportionally in the test and training sets. Additionally, some ransomware families were excluded from the training step to test the model&#x2019;s ability to generalize against novel threats, a crucial consideration for its real-world effectiveness.</p>
</sec>
</sec>
<sec id="s4">
<label>4</label>
<title>Results and Analysis</title>
<p>This section demonstrated the experimental outcome of our Android ransomware detection system, comparing classical ensemble learning methods with AutoML-based ones. In addition to the final accuracy values, we include cross-validation results, learning curve analysis, and confusion matrix analysis to ensure that the performance is stable, unbiased, and not simply a result of overfitting or anomaly in datasets. Our findings suggest that although ensemble baselines produce acceptable performance, AutoML systems, especially FLAML, always achieve higher accuracy, scalability, and efficiency, underscoring their pragmatic benefits in automating pipeline optimization.</p>
<p>Within our initial exploratory analysis, we employed hierarchical clustering to recluster ransomware families into behaviourally coherent groups. The Euclidean distance matrix, shown in <xref ref-type="fig" rid="fig-2">Fig. 2</xref>, captures pairwise similarities between centroids of classes, with darker color indicating closer distance. Interestingly, <italic>Pletor</italic> was seen as an outlier, always having high distances from other families, reflecting its distinctive behavior. The dendrogram in <xref ref-type="fig" rid="fig-3">Fig. 3</xref> indicates the hierarchical relationships between the families; classes that combine at lower distances are more similar. For instance, WannaLocker and Simplocker, and Koler and PornDroid, were close relatives, reflecting a high behavioral similarity between them. To identify meaningful clusters, we imposed a horizontal cutoff line at a distance threshold of 1, resulting in eight ransomware groups, as listed in <xref ref-type="table" rid="table-10">Table 10</xref>. In addition, <xref ref-type="table" rid="table-11">Table 11</xref> compares the initial class distributions with the regrouped distributions and shows a reduction in class complexity that may improve generalization.</p>
<fig id="fig-2">
<label>Figure 2</label>
<caption>
<title>Pairwise Euclidean distances between class centroids</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMC_72840-fig-2.tif"/>
</fig><fig id="fig-3">
<label>Figure 3</label>
<caption>
<title>Dendrogram of ransomware class centroids</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMC_72840-fig-3.tif"/>
</fig><table-wrap id="table-10">
<label>Table 10</label>
<caption>
<title>Ransomware class grouping</title>
</caption>
<table>
<colgroup>
<col align="center"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th>Group name</th>
<th>Constituent ransomware classes</th>
</tr>
</thead>
<tbody>
<tr>
<td>WL_SL</td>
<td>WannaLocker &#x0026; Simplocker</td>
</tr>
<tr>
<td>PD_Kl</td>
<td>PornDroid &#x0026; Koler</td>
</tr>
<tr>
<td>JisLP</td>
<td>Jisut &#x0026; Lockerpin</td>
</tr>
<tr>
<td>Charger</td>
<td>Charger (individual group)</td>
</tr>
<tr>
<td>RansomBO</td>
<td>RansomBO (individual group)</td>
</tr>
<tr>
<td>SVpeng</td>
<td>SVpeng (individual group)</td>
</tr>
<tr>
<td>Pletor</td>
<td>Pletor (individual group)</td>
</tr>
<tr>
<td>Benign</td>
<td>Benign (non-malicious class)</td>
</tr>
</tbody>
</table>
</table-wrap><table-wrap id="table-11">
<label>Table 11</label>
<caption>
<title>Comparison of class distribution before and after category consolidation</title>
</caption>
<table>
<colgroup>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th colspan="2" align="center">(a) Pre-Consolidation</th>
<th colspan="2" align="center">(b) Post-Consolidation</th>
</tr>
<tr>
<th>Class</th>
<th>Instances</th>
<th>Grouped category</th>
<th>Instances</th>
</tr>
</thead>
<tbody>
<tr>
<td>Svpeng</td>
<td>54,161</td>
<td><inline-formula id="ieqn-54"><mml:math id="mml-ieqn-54"><mml:msup><mml:mi>PD_KI</mml:mi><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula></td>
<td>90,637</td>
</tr>
<tr>
<td>PornDroid</td>
<td>46,082</td>
<td><inline-formula id="ieqn-55"><mml:math id="mml-ieqn-55"><mml:msup><mml:mi>WL_SL</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula></td>
<td>69,041</td>
</tr>
<tr>
<td>Koler</td>
<td>44,555</td>
<td>Svpeng</td>
<td>54,161</td>
</tr>
<tr>
<td>Benign</td>
<td>43,091</td>
<td><inline-formula id="ieqn-56"><mml:math id="mml-ieqn-56"><mml:msup><mml:mi>JisLP</mml:mi><mml:mrow><mml:mn>3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula></td>
<td>50,979</td>
</tr>
<tr>
<td>RansomBO</td>
<td>39,859</td>
<td>Benign</td>
<td>43,091</td>
</tr>
<tr>
<td>Charger</td>
<td>39,551</td>
<td>RansomBO</td>
<td>39,859</td>
</tr>
<tr>
<td>Simplocker</td>
<td>36,340</td>
<td>Charger</td>
<td>39,551</td>
</tr>
<tr>
<td>WannaLocker</td>
<td>32,701</td>
<td>Pletor</td>
<td>4715</td>
</tr>
<tr>
<td>Jisut</td>
<td>25,672</td>
<td></td>
<td></td>
</tr>
<tr>
<td>Lockerpin</td>
<td>25,307</td>
<td></td>
<td></td>
</tr>
<tr>
<td>Pletor</td>
<td>4715</td>
<td></td>
<td></td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p>Note: <sup>1</sup>Combined PornDroid &#x002B; Koler instances, <sup>2</sup>Combined WannaLocker &#x002B; Simplocker instances, <sup>3</sup>Combined Jisut &#x002B; Lockerpin instances.</p>
</table-wrap-foot>
</table-wrap>
<sec id="s4_1">
<label>4.1</label>
<title>Ensemble Learning Approach</title>
<p>We have utilized and compared nine ensemble methods according to their capability to detect Android ransomware. All models were trained and tested using a stratified data split to create a class-balanced dataset. The metrics (accuracy, precision, recall, and F1-score) have been used to evaluate them. Comparison results are shown in <xref ref-type="table" rid="table-12">Table 12</xref>. In general, the ensemble algorithms performed better than standard DNNs and CNNs, with eight out of nine models achieving an accuracy of over 99%. More concretely, the best results were achieved with Bagging, Gradient Boosting and Random Forest; their accuracies and F1-scores are nearly 100%. These results highlight the high potential of these models in accurately distinguishing between normal and adversarial traffic.</p>
<table-wrap id="table-12">
<label>Table 12</label>
<caption>
<title>Performance comparison of classifiers on ransomware dataset with extended metrics</title>
</caption>
<table>
<colgroup>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th>Model</th>
<th>Accuracy</th>
<th align="center">Balanced Acc.</th>
<th align="center">Macro precision</th>
<th>Macro Recall</th>
<th>Macro F1</th>
<th>Macro AUC</th>
</tr>
</thead>
<tbody>
<tr>
<td><bold>Bagging</bold></td>
<td><bold>0.9984</bold></td>
<td><bold>0.9974</bold></td>
<td><bold>0.9973</bold></td>
<td><bold>0.9974</bold></td>
<td><bold>0.9974</bold></td>
<td><bold>1.0000</bold></td>
</tr>
<tr>
<td>Gradient Boosting</td>
<td>0.9981</td>
<td>0.9967</td>
<td>0.9971</td>
<td>0.9967</td>
<td>0.9969</td>
<td>1.0000</td>
</tr>
<tr>
<td>LightGBM</td>
<td>0.9961</td>
<td>0.9938</td>
<td>0.9946</td>
<td>0.9938</td>
<td>0.9942</td>
<td>1.0000</td>
</tr>
<tr>
<td>HistGradientBoosting</td>
<td>0.9960</td>
<td>0.9948</td>
<td>0.9946</td>
<td>0.9948</td>
<td>0.9947</td>
<td>1.0000</td>
</tr>
<tr>
<td>Random Forest</td>
<td>0.9953</td>
<td>0.9899</td>
<td>0.9936</td>
<td>0.9899</td>
<td>0.9917</td>
<td>1.0000</td>
</tr>
<tr>
<td>XGBoost</td>
<td>0.9943</td>
<td>0.9909</td>
<td>0.9928</td>
<td>0.9909</td>
<td>0.9918</td>
<td>1.0000</td>
</tr>
<tr>
<td>Extra Trees</td>
<td>0.9914</td>
<td>0.9853</td>
<td>0.9907</td>
<td>0.9853</td>
<td>0.9879</td>
<td>0.9999</td>
</tr>
<tr>
<td>CatBoost</td>
<td>0.9897</td>
<td>0.9843</td>
<td>0.9888</td>
<td>0.9843</td>
<td>0.9865</td>
<td>0.9999</td>
</tr>
<tr>
<td>AdaBoost</td>
<td>0.5755</td>
<td>0.4636</td>
<td>0.3999</td>
<td>0.4636</td>
<td>0.4127</td>
<td>0.9248</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The results presented in <xref ref-type="table" rid="table-13">Tables 13</xref>&#x2013;<xref ref-type="table" rid="table-21">21</xref> demonstrate visually that ensemble learning is an effective model for detecting Android ransomware. They maintained an exceptional performance at all times, with a global accuracy &#x003E;99.4%, and macro/weighted F1-scores above 0.99 for every technique mentioned above (Random Forest, XGBoost, Bagging, Gradient Boosting and HistGradientBoosting). These models not only have good majority class performance, but also surprisingly high precision and recall for minority families such as Pletor and Charger, which further demonstrates their capability in dealing with imbalanced datasets.</p>
<table-wrap id="table-13">
<label>Table 13</label>
<caption>
<title>Results of the CatBoost classifier</title>
</caption>
<table>
<colgroup>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th>No.</th>
<th>Class</th>
<th>Precision</th>
<th>Recall</th>
<th>F1-Score</th>
<th>Support</th>
</tr>
</thead>
<tbody>
<tr>
<td>0</td>
<td>Benign</td>
<td>0.9985</td>
<td>1.0000</td>
<td>0.9992</td>
<td>8618</td>
</tr>
<tr>
<td>1</td>
<td>Charger</td>
<td>0.9896</td>
<td>0.9765</td>
<td>0.9830</td>
<td>7910</td>
</tr>
<tr>
<td>2</td>
<td>JisLP</td>
<td>0.9699</td>
<td>0.9949</td>
<td>0.9822</td>
<td>10,196</td>
</tr>
<tr>
<td>3</td>
<td>PD_Kl</td>
<td>0.9845</td>
<td>0.9856</td>
<td>0.9850</td>
<td>18,128</td>
</tr>
<tr>
<td>4</td>
<td>Pletor</td>
<td>0.9790</td>
<td>0.9406</td>
<td>0.9594</td>
<td>943</td>
</tr>
<tr>
<td>5</td>
<td>RansomBO</td>
<td>0.9927</td>
<td>0.9923</td>
<td>0.9925</td>
<td>7972</td>
</tr>
<tr>
<td>6</td>
<td>SVpeng</td>
<td>0.9974</td>
<td>0.9948</td>
<td>0.9961</td>
<td>10,832</td>
</tr>
<tr>
<td>7</td>
<td>WL_SL</td>
<td>0.9989</td>
<td>0.9899</td>
<td>0.9944</td>
<td>13,808</td>
</tr>
<tr>
<td colspan="2" align="center"><bold>Accuracy:</bold></td>
<td colspan="4" align="center">0.9897</td>
</tr>
<tr>
<td colspan="2" align="center"><bold>Macro Avg:</bold></td>
<td>0.9888</td>
<td>0.9843</td>
<td>0.9865</td>
<td>78,407</td>
</tr>
<tr>
<td colspan="2" align="center"><bold>Weighted Avg:</bold></td>
<td>0.9897</td>
<td>0.9897</td>
<td>0.9897</td>
<td>78,407</td>
</tr>
</tbody>
</table>
</table-wrap><table-wrap id="table-14">
<label>Table 14</label>
<caption>
<title>Results of the XGBoost classifier</title>
</caption>
<table>
<colgroup>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th>No.</th>
<th>Class</th>
<th>Precision</th>
<th>Recall</th>
<th>F1-Score</th>
<th>Support</th>
</tr>
</thead>
<tbody>
<tr>
<td>0</td>
<td>Benign</td>
<td>0.9999</td>
<td>0.9992</td>
<td>0.9995</td>
<td>8618</td>
</tr>
<tr>
<td>1</td>
<td>Charger</td>
<td>0.9908</td>
<td>0.9933</td>
<td>0.9920</td>
<td>7910</td>
</tr>
<tr>
<td>2</td>
<td>JisLP</td>
<td>0.9881</td>
<td>0.9886</td>
<td>0.9884</td>
<td>10,196</td>
</tr>
<tr>
<td>3</td>
<td>PD_Kl</td>
<td>0.9917</td>
<td>0.9933</td>
<td>0.9925</td>
<td>18,128</td>
</tr>
<tr>
<td>4</td>
<td>Pletor</td>
<td>0.9795</td>
<td>0.9629</td>
<td>0.9711</td>
<td>943</td>
</tr>
<tr>
<td>5</td>
<td>RansomBO</td>
<td>0.9957</td>
<td>0.9959</td>
<td>0.9958</td>
<td>7972</td>
</tr>
<tr>
<td>6</td>
<td>SVpeng</td>
<td>0.9990</td>
<td>0.9989</td>
<td>0.9989</td>
<td>10,832</td>
</tr>
<tr>
<td>7</td>
<td>WL_SL</td>
<td>0.9974</td>
<td>0.9951</td>
<td>0.9963</td>
<td>13,808</td>
</tr>
<tr>
<td colspan="2" align="center"><bold>Accuracy:</bold></td>
<td colspan="4" align="center">0.9943</td>
</tr>
<tr>
<td colspan="2" align="center"><bold>Macro Avg:</bold></td>
<td>0.9928</td>
<td>0.9909</td>
<td>0.9918</td>
<td>78,407</td>
</tr>
<tr>
<td colspan="2" align="center"><bold>Weighted Avg:</bold></td>
<td>0.9943</td>
<td>0.9943</td>
<td>0.9943</td>
<td>78,407</td>
</tr>
</tbody>
</table>
</table-wrap><table-wrap id="table-15">
<label>Table 15</label>
<caption>
<title>Results of the random forest classifier</title>
</caption>
<table>
<colgroup>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th>No.</th>
<th>Class</th>
<th>Precision</th>
<th>Recall</th>
<th>F1-Score</th>
<th>Support</th>
</tr>
</thead>
<tbody>
<tr>
<td>0</td>
<td>Benign</td>
<td>1.0000</td>
<td>1.0000</td>
<td>1.0000</td>
<td>8618</td>
</tr>
<tr>
<td>1</td>
<td>Charger</td>
<td>0.9967</td>
<td>0.9925</td>
<td>0.9946</td>
<td>7910</td>
</tr>
<tr>
<td>2</td>
<td>JisLP</td>
<td>0.9926</td>
<td>0.9903</td>
<td>0.9915</td>
<td>10,196</td>
</tr>
<tr>
<td>3</td>
<td>PD_Kl</td>
<td>0.9935</td>
<td>0.9961</td>
<td>0.9948</td>
<td>18,128</td>
</tr>
<tr>
<td>4</td>
<td>Pletor</td>
<td>0.9768</td>
<td>0.9374</td>
<td>0.9567</td>
<td>943</td>
</tr>
<tr>
<td>5</td>
<td>RansomBO</td>
<td>0.9947</td>
<td>0.9977</td>
<td>0.9962</td>
<td>7972</td>
</tr>
<tr>
<td>6</td>
<td>SVpeng</td>
<td>0.9966</td>
<td>0.9987</td>
<td>0.9976</td>
<td>10,832</td>
</tr>
<tr>
<td>7</td>
<td>WL_SL</td>
<td>0.9967</td>
<td>0.9967</td>
<td>0.9967</td>
<td>13,808</td>
</tr>
<tr>
<td colspan="2" align="center"><bold>Accuracy:</bold></td>
<td colspan="4" align="center">0.9954</td>
</tr>
<tr>
<td colspan="2" align="center"><bold>Macro Avg:</bold></td>
<td>0.9935</td>
<td>0.9887</td>
<td>0.9910</td>
<td>78,407</td>
</tr>
<tr>
<td colspan="2" align="center"><bold>Weighted Avg:</bold></td>
<td>0.9953</td>
<td>0.9954</td>
<td>0.9953</td>
<td>78,407</td>
</tr>
</tbody>
</table>
</table-wrap><table-wrap id="table-16">
<label>Table 16</label>
<caption>
<title>Results of the extra trees classifier</title>
</caption>
<table>
<colgroup>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th>No.</th>
<th>Class</th>
<th>Precision</th>
<th>Recall</th>
<th>F1-Score</th>
<th>Support</th>
</tr>
</thead>
<tbody>
<tr>
<td>0</td>
<td>Benign</td>
<td>0.9994</td>
<td>0.9983</td>
<td>0.9988</td>
<td>8618</td>
</tr>
<tr>
<td>1</td>
<td>Charger</td>
<td>0.9914</td>
<td>0.9804</td>
<td>0.9859</td>
<td>7910</td>
</tr>
<tr>
<td>2</td>
<td>JisLP</td>
<td>0.9796</td>
<td>0.9868</td>
<td>0.9832</td>
<td>10,196</td>
</tr>
<tr>
<td>3</td>
<td>PD_Kl</td>
<td>0.9873</td>
<td>0.9892</td>
<td>0.9883</td>
<td>18,128</td>
</tr>
<tr>
<td>4</td>
<td>Pletor</td>
<td>0.9866</td>
<td>0.9385</td>
<td>0.9620</td>
<td>943</td>
</tr>
<tr>
<td>5</td>
<td>RansomBO</td>
<td>0.9926</td>
<td>0.9962</td>
<td>0.9944</td>
<td>7972</td>
</tr>
<tr>
<td>6</td>
<td>SVpeng</td>
<td>0.9957</td>
<td>0.9993</td>
<td>0.9975</td>
<td>10,832</td>
</tr>
<tr>
<td>7</td>
<td>WL_SL</td>
<td>0.9970</td>
<td>0.9946</td>
<td>0.9958</td>
<td>13,808</td>
</tr>
<tr>
<td colspan="2" align="center"><bold>Accuracy:</bold></td>
<td colspan="4" align="center">0.9914</td>
</tr>
<tr>
<td colspan="2" align="center"><bold>Macro Avg:</bold></td>
<td>0.9912</td>
<td>0.9854</td>
<td>0.9882</td>
<td>78,407</td>
</tr>
<tr>
<td colspan="2" align="center"><bold>Weighted Avg:</bold></td>
<td>0.9915</td>
<td>0.9914</td>
<td>0.9914</td>
<td>78,407</td>
</tr>
</tbody>
</table>
</table-wrap><table-wrap id="table-17">
<label>Table 17</label>
<caption>
<title>Results of the HistGradientBoosting classifier</title>
</caption>
<table>
<colgroup>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th>No.</th>
<th>Class</th>
<th>Precision</th>
<th>Recall</th>
<th>F1-Score</th>
<th>Support</th>
</tr>
</thead>
<tbody>
<tr>
<td>0</td>
<td>Benign</td>
<td>0.9991</td>
<td>0.9988</td>
<td>0.9990</td>
<td>8618</td>
</tr>
<tr>
<td>1</td>
<td>Charger</td>
<td>0.9956</td>
<td>0.9942</td>
<td>0.9949</td>
<td>7910</td>
</tr>
<tr>
<td>2</td>
<td>JisLP</td>
<td>0.9923</td>
<td>0.9916</td>
<td>0.9920</td>
<td>10,196</td>
</tr>
<tr>
<td>3</td>
<td>PD_Kl</td>
<td>0.9938</td>
<td>0.9955</td>
<td>0.9947</td>
<td>18,128</td>
</tr>
<tr>
<td>4</td>
<td>Pletor</td>
<td>0.9820</td>
<td>0.9852</td>
<td>0.9836</td>
<td>943</td>
</tr>
<tr>
<td>5</td>
<td>RansomBO</td>
<td>0.9974</td>
<td>0.9974</td>
<td>0.9974</td>
<td>7972</td>
</tr>
<tr>
<td>6</td>
<td>SVpeng</td>
<td>0.9994</td>
<td>0.9994</td>
<td>0.9994</td>
<td>10,832</td>
</tr>
<tr>
<td>7</td>
<td>WL_SL</td>
<td>0.9974</td>
<td>0.9965</td>
<td>0.9969</td>
<td>13,808</td>
</tr>
<tr>
<td colspan="2" align="center"><bold>Accuracy:</bold></td>
<td colspan="4" align="center">0.9960</td>
</tr>
<tr>
<td colspan="2" align="center"><bold>Macro Avg:</bold></td>
<td>0.9946</td>
<td>0.9948</td>
<td>0.9947</td>
<td>78,407</td>
</tr>
<tr>
<td colspan="2" align="center"><bold>Weighted Avg:</bold></td>
<td>0.9960</td>
<td>0.9960</td>
<td>0.9960</td>
<td>78,407</td>
</tr>
</tbody>
</table>
</table-wrap><table-wrap id="table-18">
<label>Table 18</label>
<caption>
<title>Results of the gradient boosting classifier</title>
</caption>
<table>
<colgroup>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th>No.</th>
<th>Class</th>
<th>Precision</th>
<th>Recall</th>
<th>F1-Score</th>
<th>Support</th>
</tr>
</thead>
<tbody>
<tr>
<td>0</td>
<td>Benign</td>
<td>1.0000</td>
<td>1.0000</td>
<td>1.0000</td>
<td>8618</td>
</tr>
<tr>
<td>1</td>
<td>Charger</td>
<td>0.9976</td>
<td>0.9973</td>
<td>0.9975</td>
<td>7910</td>
</tr>
<tr>
<td>2</td>
<td>JisLP</td>
<td>0.9964</td>
<td>0.9983</td>
<td>0.9974</td>
<td>10,196</td>
</tr>
<tr>
<td>3</td>
<td>PD_Kl</td>
<td>0.9983</td>
<td>0.9987</td>
<td>0.9985</td>
<td>18,128</td>
</tr>
<tr>
<td>4</td>
<td>Pletor</td>
<td>0.9883</td>
<td>0.9852</td>
<td>0.9867</td>
<td>943</td>
</tr>
<tr>
<td>5</td>
<td>RansomBO</td>
<td>0.9981</td>
<td>0.9979</td>
<td>0.9980</td>
<td>7972</td>
</tr>
<tr>
<td>6</td>
<td>SVpeng</td>
<td>0.9988</td>
<td>0.9994</td>
<td>0.9991</td>
<td>10,832</td>
</tr>
<tr>
<td>7</td>
<td>WL_SL</td>
<td>0.9981</td>
<td>0.9962</td>
<td>0.9972</td>
<td>13,808</td>
</tr>
<tr>
<td colspan="2" align="center"><bold>Accuracy:</bold></td>
<td colspan="4" align="center">0.9981</td>
</tr>
<tr>
<td colspan="2" align="center"><bold>Macro Avg:</bold></td>
<td>0.9970</td>
<td>0.9966</td>
<td>0.9968</td>
<td>78,407</td>
</tr>
<tr>
<td colspan="2" align="center"><bold>Weighted Avg:</bold></td>
<td>0.9981</td>
<td>0.9981</td>
<td>0.9981</td>
<td>78,407</td>
</tr>
</tbody>
</table>
</table-wrap><table-wrap id="table-19">
<label>Table 19</label>
<caption>
<title>Results of the LightGBM classifier</title>
</caption>
<table>
<colgroup>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th>No.</th>
<th>Class</th>
<th>Precision</th>
<th>Recall</th>
<th>F1-Score</th>
<th>Support</th>
</tr>
</thead>
<tbody>
<tr>
<td>0</td>
<td>Benign</td>
<td>0.9990</td>
<td>0.9998</td>
<td>0.9994</td>
<td>8618</td>
</tr>
<tr>
<td>1</td>
<td>Charger</td>
<td>0.9970</td>
<td>0.9937</td>
<td>0.9953</td>
<td>7910</td>
</tr>
<tr>
<td>2</td>
<td>JisLP</td>
<td>0.9907</td>
<td>0.9936</td>
<td>0.9922</td>
<td>10,196</td>
</tr>
<tr>
<td>3</td>
<td>PD_Kl</td>
<td>0.9947</td>
<td>0.9951</td>
<td>0.9949</td>
<td>18,128</td>
</tr>
<tr>
<td>4</td>
<td>Pletor</td>
<td>0.9818</td>
<td>0.9745</td>
<td>0.9782</td>
<td>943</td>
</tr>
<tr>
<td>5</td>
<td>RansomBO</td>
<td>0.9966</td>
<td>0.9977</td>
<td>0.9972</td>
<td>7972</td>
</tr>
<tr>
<td>6</td>
<td>SVpeng</td>
<td>0.9990</td>
<td>0.9995</td>
<td>0.9993</td>
<td>10,832</td>
</tr>
<tr>
<td>7</td>
<td>WL_SL</td>
<td>0.9983</td>
<td>0.9963</td>
<td>0.9973</td>
<td>13,808</td>
</tr>
<tr>
<td colspan="2" align="center"><bold>Accuracy:</bold></td>
<td colspan="4" align="center">0.9961</td>
</tr>
<tr>
<td colspan="2" align="center"><bold>Macro Avg:</bold></td>
<td>0.9946</td>
<td>0.9938</td>
<td>0.9942</td>
<td>78,407</td>
</tr>
<tr>
<td colspan="2" align="center"><bold>Weighted Avg:</bold></td>
<td>0.9961</td>
<td>0.9961</td>
<td>0.9961</td>
<td>78,407</td>
</tr>
</tbody>
</table>
</table-wrap><table-wrap id="table-20">
<label>Table 20</label>
<caption>
<title>Results of the AdaBoost classifier</title>
</caption>
<table>
<colgroup>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th>No.</th>
<th>Class</th>
<th>Precision</th>
<th>Recall</th>
<th>F1-Score</th>
<th>Support</th>
</tr>
</thead>
<tbody>
<tr>
<td>0</td>
<td>Benign</td>
<td>0.0000</td>
<td>0.0000</td>
<td>0.0000</td>
<td>8618</td>
</tr>
<tr>
<td>1</td>
<td>Charger</td>
<td>0.4345</td>
<td>0.8834</td>
<td>0.5825</td>
<td>7910</td>
</tr>
<tr>
<td>2</td>
<td>JisLP</td>
<td>0.3677</td>
<td>0.8510</td>
<td>0.5136</td>
<td>10,196</td>
</tr>
<tr>
<td>3</td>
<td>PD_Kl</td>
<td>0.0000</td>
<td>0.0000</td>
<td>0.0000</td>
<td>18,128</td>
</tr>
<tr>
<td>4</td>
<td>Pletor</td>
<td>0.0000</td>
<td>0.0000</td>
<td>0.0000</td>
<td>943</td>
</tr>
<tr>
<td>5</td>
<td>RansomBO</td>
<td>0.8689</td>
<td>0.8857</td>
<td>0.8773</td>
<td>7972</td>
</tr>
<tr>
<td>6</td>
<td>SVpeng</td>
<td>0.7157</td>
<td>0.9955</td>
<td>0.8327</td>
<td>10,832</td>
</tr>
<tr>
<td>7</td>
<td>WL_SL</td>
<td>0.7405</td>
<td>0.8331</td>
<td>0.7841</td>
<td>13,808</td>
</tr>
<tr>
<td colspan="2" align="center"><bold>Accuracy:</bold></td>
<td colspan="4" align="center">0.5741</td>
</tr>
<tr>
<td colspan="2" align="center"><bold>Macro Avg:</bold></td>
<td>0.3909</td>
<td>0.5561</td>
<td>0.4488</td>
<td>78,407</td>
</tr>
<tr>
<td colspan="2" align="center"><bold>Weighted Avg:</bold></td>
<td>0.4093</td>
<td>0.5741</td>
<td>0.4679</td>
<td>78,407</td>
</tr>
</tbody>
</table>
</table-wrap><table-wrap id="table-21">
<label>Table 21</label>
<caption>
<title>Classification report of the bagging classifier</title>
</caption>
<table>
<colgroup>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th>No.</th>
<th>Class</th>
<th>Precision</th>
<th>Recall</th>
<th>F1-Score</th>
<th>Support</th>
</tr>
</thead>
<tbody>
<tr>
<td>0</td>
<td>Benign</td>
<td>1.0000</td>
<td>1.0000</td>
<td>1.0000</td>
<td>8618</td>
</tr>
<tr>
<td>1</td>
<td>Charger</td>
<td>0.9997</td>
<td>0.9980</td>
<td>0.9988</td>
<td>7910</td>
</tr>
<tr>
<td>2</td>
<td>JisLP</td>
<td>0.9984</td>
<td>0.9987</td>
<td>0.9985</td>
<td>10,196</td>
</tr>
<tr>
<td>3</td>
<td>PD_Kl</td>
<td>0.9986</td>
<td>0.9990</td>
<td>0.9988</td>
<td>18,128</td>
</tr>
<tr>
<td>4</td>
<td>Pletor</td>
<td>0.9946</td>
<td>0.9926</td>
<td>0.9936</td>
<td>943</td>
</tr>
<tr>
<td>5</td>
<td>RansomBO</td>
<td>0.9989</td>
<td>0.9987</td>
<td>0.9988</td>
<td>7972</td>
</tr>
<tr>
<td>6</td>
<td>SVpeng</td>
<td>0.9992</td>
<td>0.9996</td>
<td>0.9994</td>
<td>10,832</td>
</tr>
<tr>
<td>7</td>
<td>WL_SL</td>
<td>0.9984</td>
<td>0.9973</td>
<td>0.9978</td>
<td>13,808</td>
</tr>
<tr>
<td colspan="2" align="center"><bold>Accuracy:</bold></td>
<td colspan="4" align="center">0.9984</td>
</tr>
<tr>
<td colspan="2" align="center"><bold>Macro Avg:</bold></td>
<td>0.9985</td>
<td>0.9980</td>
<td>0.9982</td>
<td>78,407</td>
</tr>
<tr>
<td colspan="2" align="center"><bold>Weighted Avg:</bold></td>
<td>0.9984</td>
<td>0.9984</td>
<td>0.9984</td>
<td>78,407</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>On the other hand, Extra Trees, CatBoost and LightGBM were performing quite well (98.9%&#x2013;99.6% accuracies), but somewhat less consistently in the multimodal class detection than RF for minority classes. On the other hand, except for performance, which was severely downgraded with overall accuracy falling to 57.4% and poor class-wise F1-scores, indicating it possesses weak generalization ability in multi-class ransomware detection. Finally, the conclusions are that Bagging, Gradient Boosting, HistGradientBoosting, Random Forest and XGBoost as their best recommendable ensemble algorithms; and good baselines for what to compare against more complex AutoML-based or hybrid models.</p>
<p>To ensure the reliability of our conclusions as well as to mitigate a potential performance bias introduced by one train-test split, we conducted 5-fold stratified cross-validation. The results in <xref ref-type="table" rid="table-22">Table 22</xref> indicate that, there are standard deviations under all settings and this justifies the model maintains similar performance on separate folds of the dataset.</p>
<table-wrap id="table-22">
<label>Table 22</label>
<caption>
<title>5-Fold cross-validation results (mean <inline-formula id="ieqn-60"><mml:math id="mml-ieqn-60"><mml:mo>&#x00B1;</mml:mo></mml:math></inline-formula> std) for traditional ensemble models</title>
</caption>
<table>
<colgroup>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th>Model</th>
<th>Accuracy</th>
<th>Balanced Acc.</th>
<th>Macro precision</th>
<th>Macro recall</th>
<th>Macro F1</th>
</tr>
</thead>
<tbody>
<tr>
<td>Random Forest</td>
<td>0.9946 <inline-formula id="ieqn-61"><mml:math id="mml-ieqn-61"><mml:mo>&#x00B1;</mml:mo></mml:math></inline-formula> 0.0003</td>
<td>0.9889 <inline-formula id="ieqn-62"><mml:math id="mml-ieqn-62"><mml:mo>&#x00B1;</mml:mo></mml:math></inline-formula> 0.0007</td>
<td>0.9935 <inline-formula id="ieqn-63"><mml:math id="mml-ieqn-63"><mml:mo>&#x00B1;</mml:mo></mml:math></inline-formula> 0.0003</td>
<td>0.9889 <inline-formula id="ieqn-64"><mml:math id="mml-ieqn-64"><mml:mo>&#x00B1;</mml:mo></mml:math></inline-formula> 0.0007</td>
<td>0.9911 <inline-formula id="ieqn-65"><mml:math id="mml-ieqn-65"><mml:mo>&#x00B1;</mml:mo></mml:math></inline-formula> 0.0003</td>
</tr>
<tr>
<td>Gradient Boosting</td>
<td>0.9982 <inline-formula id="ieqn-66"><mml:math id="mml-ieqn-66"><mml:mo>&#x00B1;</mml:mo></mml:math></inline-formula> 0.0002</td>
<td>0.9971 <inline-formula id="ieqn-67"><mml:math id="mml-ieqn-67"><mml:mo>&#x00B1;</mml:mo></mml:math></inline-formula> 0.0007</td>
<td>0.9971 <inline-formula id="ieqn-68"><mml:math id="mml-ieqn-68"><mml:mo>&#x00B1;</mml:mo></mml:math></inline-formula> 0.0006</td>
<td>0.9971 <inline-formula id="ieqn-69"><mml:math id="mml-ieqn-69"><mml:mo>&#x00B1;</mml:mo></mml:math></inline-formula> 0.0007</td>
<td>0.9971 <inline-formula id="ieqn-70"><mml:math id="mml-ieqn-70"><mml:mo>&#x00B1;</mml:mo></mml:math></inline-formula> 0.0006</td>
</tr>
<tr>
<td>HistGradientBoosting</td>
<td>0.9955 <inline-formula id="ieqn-71"><mml:math id="mml-ieqn-71"><mml:mo>&#x00B1;</mml:mo></mml:math></inline-formula> 0.0004</td>
<td>0.9937 <inline-formula id="ieqn-72"><mml:math id="mml-ieqn-72"><mml:mo>&#x00B1;</mml:mo></mml:math></inline-formula> 0.0009</td>
<td>0.9932 <inline-formula id="ieqn-73"><mml:math id="mml-ieqn-73"><mml:mo>&#x00B1;</mml:mo></mml:math></inline-formula> 0.0007</td>
<td>0.9937 <inline-formula id="ieqn-74"><mml:math id="mml-ieqn-74"><mml:mo>&#x00B1;</mml:mo></mml:math></inline-formula> 0.0009</td>
<td>0.9935 <inline-formula id="ieqn-75"><mml:math id="mml-ieqn-75"><mml:mo>&#x00B1;</mml:mo></mml:math></inline-formula> 0.0008</td>
</tr>
<tr>
<td>LightGBM</td>
<td>0.9963 <inline-formula id="ieqn-76"><mml:math id="mml-ieqn-76"><mml:mo>&#x00B1;</mml:mo></mml:math></inline-formula> 0.0001</td>
<td>0.9944 <inline-formula id="ieqn-77"><mml:math id="mml-ieqn-77"><mml:mo>&#x00B1;</mml:mo></mml:math></inline-formula> 0.0005</td>
<td>0.9951 <inline-formula id="ieqn-78"><mml:math id="mml-ieqn-78"><mml:mo>&#x00B1;</mml:mo></mml:math></inline-formula> 0.0005</td>
<td>0.9944 <inline-formula id="ieqn-79"><mml:math id="mml-ieqn-79"><mml:mo>&#x00B1;</mml:mo></mml:math></inline-formula> 0.0005</td>
<td>0.9947 <inline-formula id="ieqn-80"><mml:math id="mml-ieqn-80"><mml:mo>&#x00B1;</mml:mo></mml:math></inline-formula> 0.0004</td>
</tr>
<tr>
<td>Bagging</td>
<td>0.9983 <inline-formula id="ieqn-81"><mml:math id="mml-ieqn-81"><mml:mo>&#x00B1;</mml:mo></mml:math></inline-formula> 0.0002</td>
<td>0.9974 <inline-formula id="ieqn-82"><mml:math id="mml-ieqn-82"><mml:mo>&#x00B1;</mml:mo></mml:math></inline-formula> 0.0006</td>
<td>0.9972 <inline-formula id="ieqn-83"><mml:math id="mml-ieqn-83"><mml:mo>&#x00B1;</mml:mo></mml:math></inline-formula> 0.0005</td>
<td>0.9974 <inline-formula id="ieqn-84"><mml:math id="mml-ieqn-84"><mml:mo>&#x00B1;</mml:mo></mml:math></inline-formula> 0.0006</td>
<td>0.9973 <inline-formula id="ieqn-85"><mml:math id="mml-ieqn-85"><mml:mo>&#x00B1;</mml:mo></mml:math></inline-formula> 0.0005</td>
</tr>
<tr>
<td>Extra Trees</td>
<td>0.9908 <inline-formula id="ieqn-86"><mml:math id="mml-ieqn-86"><mml:mo>&#x00B1;</mml:mo></mml:math></inline-formula> 0.0003</td>
<td>0.9851 <inline-formula id="ieqn-87"><mml:math id="mml-ieqn-87"><mml:mo>&#x00B1;</mml:mo></mml:math></inline-formula> 0.0010</td>
<td>0.9901 <inline-formula id="ieqn-88"><mml:math id="mml-ieqn-88"><mml:mo>&#x00B1;</mml:mo></mml:math></inline-formula> 0.0004</td>
<td>0.9851 <inline-formula id="ieqn-89"><mml:math id="mml-ieqn-89"><mml:mo>&#x00B1;</mml:mo></mml:math></inline-formula> 0.0010</td>
<td>0.9875 <inline-formula id="ieqn-90"><mml:math id="mml-ieqn-90"><mml:mo>&#x00B1;</mml:mo></mml:math></inline-formula> 0.0006</td>
</tr>
<tr>
<td>XGBoost</td>
<td>0.9947 <inline-formula id="ieqn-91"><mml:math id="mml-ieqn-91"><mml:mo>&#x00B1;</mml:mo></mml:math></inline-formula> 0.0002</td>
<td>0.9920 <inline-formula id="ieqn-92"><mml:math id="mml-ieqn-92"><mml:mo>&#x00B1;</mml:mo></mml:math></inline-formula> 0.0005</td>
<td>0.9933 <inline-formula id="ieqn-93"><mml:math id="mml-ieqn-93"><mml:mo>&#x00B1;</mml:mo></mml:math></inline-formula> 0.0004</td>
<td>0.9920 <inline-formula id="ieqn-94"><mml:math id="mml-ieqn-94"><mml:mo>&#x00B1;</mml:mo></mml:math></inline-formula> 0.0005</td>
<td>0.9927 <inline-formula id="ieqn-95"><mml:math id="mml-ieqn-95"><mml:mo>&#x00B1;</mml:mo></mml:math></inline-formula> 0.0002</td>
</tr>
<tr>
<td>CatBoost</td>
<td>0.9913 <inline-formula id="ieqn-96"><mml:math id="mml-ieqn-96"><mml:mo>&#x00B1;</mml:mo></mml:math></inline-formula> 0.0003</td>
<td>0.9871 <inline-formula id="ieqn-97"><mml:math id="mml-ieqn-97"><mml:mo>&#x00B1;</mml:mo></mml:math></inline-formula> 0.0030</td>
<td>0.9879 <inline-formula id="ieqn-98"><mml:math id="mml-ieqn-98"><mml:mo>&#x00B1;</mml:mo></mml:math></inline-formula> 0.0018</td>
<td>0.9871 <inline-formula id="ieqn-99"><mml:math id="mml-ieqn-99"><mml:mo>&#x00B1;</mml:mo></mml:math></inline-formula> 0.0030</td>
<td>0.9875 <inline-formula id="ieqn-100"><mml:math id="mml-ieqn-100"><mml:mo>&#x00B1;</mml:mo></mml:math></inline-formula> 0.0022</td>
</tr>
<tr>
<td>AdaBoost</td>
<td>0.6012 <inline-formula id="ieqn-101"><mml:math id="mml-ieqn-101"><mml:mo>&#x00B1;</mml:mo></mml:math></inline-formula> 0.0450</td>
<td>0.4919 <inline-formula id="ieqn-102"><mml:math id="mml-ieqn-102"><mml:mo>&#x00B1;</mml:mo></mml:math></inline-formula> 0.0431</td>
<td>0.4490 <inline-formula id="ieqn-103"><mml:math id="mml-ieqn-103"><mml:mo>&#x00B1;</mml:mo></mml:math></inline-formula> 0.0475</td>
<td>0.4919 <inline-formula id="ieqn-104"><mml:math id="mml-ieqn-104"><mml:mo>&#x00B1;</mml:mo></mml:math></inline-formula> 0.0431</td>
<td>0.4521 <inline-formula id="ieqn-105"><mml:math id="mml-ieqn-105"><mml:mo>&#x00B1;</mml:mo></mml:math></inline-formula> 0.0476</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>For a deeper understanding of Bagging classifier&#x2019;s behavior, we also investigated its learning curve as shown in <xref ref-type="fig" rid="fig-4">Fig. 4</xref>. This curve displays the same metric on the training set and the cross-validation set at varying numbers of training samples. It can be observed that the cross-validation accuracy begins at a lower level and then gradually increases as more training data is presented, indicating that the model&#x2019;s overfitting is reducing and it is learning more generalizable patterns.</p>
<fig id="fig-4">
<label>Figure 4</label>
<caption>
<title>Bagging learning curve plotting training vs. validation accuracy against increasing training sizes. Convergence shows there is no overfitting</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMC_72840-fig-4.tif"/>
</fig>
<p>The continued separation of the training and validation curves reflect a certain degree of variance- the model is probably overfitting at this point. However, the validation curve is still increasing, indicating that more data benefits the model, and its generalization performance will improve. Robust cross-validation scores support that the models do generalize well. This robustness stems from the use of L1-regularized feature selection, which discards less relevant features, as well as from ensemble methods, which decrease variance by averaging and thereby overcome overfitting. The Bagging classifier emerged as the best performer among all evaluated models. For further assessing its performance, the confusion matrix in <xref ref-type="fig" rid="fig-5">Fig. 5</xref> confirms that for most predictions, there are only a few misclassifications for all classes.</p>
<fig id="fig-5">
<label>Figure 5</label>
<caption>
<title>Confusion matrix for bagging classifier</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMC_72840-fig-5.tif"/>
</fig>
<p>The results further support the argument that the <bold>Bagging classifier</bold> is the best model for Android ransomware classification. Scoring critically high accuracy of <bold>99.84%</bold>, it demonstrates ideal precision and recall for all ransomware families, demonstrating the capacity to filter out false positives or negatives. For the family of gradient boosting models, <bold>Gradient Boosting</bold> (GB) turned out to be better than today&#x2019;s options, including <bold>LightGBM, HistGradientBoosting, XGBoost, and CatBoost</bold>, due to its superior compatibility with multi-class ransomware specifics. The <bold>Random Forest classifier</bold> did also perform well with an accuracy of <bold>99.54%</bold> and is thus a solid choice for this classification problem. While the <bold>Extra Trees</bold> classifier was great and had an accuracy of <bold>99.14%</bold>, it was not as strong as the best performing models. At the other end of the scale, conversely, <bold>AdaBoost</bold> did much worse than the competition with a small <bold>57.41%</bold> accuracy, likely proving that its boosting is not particularly well coordinated for the difficult multi-class task of android ransomware data. Cross-validation and learning curve examination ensure the performance shown is trustworthy and not due to data bias/overfitting. However, the validation accuracy for smaller training sizes is increasing slowly in the learning curve, which means that the convergence speed of the Bagging classifier is limited and can be a bottleneck for large-scale or real-time applications. In contrast to ensembles consisting of manually designed models, AutoML-powered models, such as FLAML, can dynamically optimize hyperparameters and model selection, which often results in faster convergence and efficient learning that overcomes this limitation.</p>
</sec>
<sec id="s4_2">
<label>4.2</label>
<title>AutoML Approaches</title>
<p>Three highly popular AutoML toolkits were analyzed for automating the choice of models/hyperparameters: <bold>EvalML</bold>, <bold>TPOT</bold> and <bold>FLAML</bold>. They avoid the extensive manual experimentation required to find an optimal pipeline by conducting a targeted search for near-optimal pipelines, making experimental efforts more affordable and competitive with or even better than exhaustive searches. Interpretability and handling of imbalanced data are also a focus of EvalML, in combination with internal preprocessing logic. The best pipeline (label encoding, missing value imputation, under sampling and then a column wise transformation), resulted into an XGBoost model found by EvalML. With both these preprocessing steps in a pipeline, we obtained approximately <bold>99.38%</bold> accuracy with just about <bold>12.5 s</bold> of training! As shown in <xref ref-type="fig" rid="fig-6">Fig. 6</xref>, the component-wise structure of EvalML provides an excellent trade-off between interpretability and accuracy.</p>
<fig id="fig-6">
<label>Figure 6</label>
<caption>
<title>EvalML best pipeline</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMC_72840-fig-6.tif"/>
</fig>
<p>TPOT uses a method called genetic programming to breed, develop and refine pipelines, even with a limited configuration (only <bold>5 generations</bold> and <bold>20</bold> individuals), TPOT generated AoB pipeline, which contains pipelines with one or more pre-processing stages such as imputation, scaling, variance filterer, and feature union. The last model, which was built on an <bold>ExtraTreesClassifier</bold> achieved an outstanding performance of <bold>99.49%</bold> with the overall training time being about <bold>2 h and 45 min</bold>. The resulting pipeline structure in <xref ref-type="fig" rid="fig-7">Fig. 7</xref> illustrates the flexibility of TPOT to build intricate and efficient architectures.</p>
<fig id="fig-7">
<label>Figure 7</label>
<caption>
<title>TPOT best pipeline</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMC_72840-fig-7.tif"/>
</fig>
<p>Conversely, FLAML focuses on fast and lightweight AutoML at a low computational cost. With a tight timing of <bold>300-sec</bold> (5-min) budget, FLAML efficiently converged to an effective <bold>Random Forest Classifier</bold>, attaining the best-observed accuracy of <bold>99.85%</bold>. The best setting had a final setup with <bold>92 estimators</bold> and hyperparameter tuning for maximum features, leaf nodes at entropy splitting. The learning curve in <xref ref-type="fig" rid="fig-8">Fig. 8</xref> illustrates the efficiency and scalability of FLAML for quick turnaround experiments.</p>
<fig id="fig-8">
<label>Figure 8</label>
<caption>
<title>FLAML learning curve</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMC_72840-fig-8.tif"/>
</fig>
<p>From the comparative study of the above three AutoML frameworks (<xref ref-type="table" rid="table-23">Table 23</xref>), it is evident that there are clear trade-offs between accuracy and effort during training. FLAML selected the Random Forest classifier as the best model and achieved a top accuracy of 99.85% at around 38 min of training time. TPOT chose an Extra Trees classifier with a lower accuracy (99.49%) and a much more expensive cost of training, almost 3 h took for performing evolutionary search during model selection. Compared to this, EvalML performed extremely well, selecting an XGBoost predictor with 99.38% accuracy and running in just 12.5 s. Despite its lower accuracy, this was by far the fastest result out of any framework.</p>
<table-wrap id="table-23">
<label>Table 23</label>
<caption>
<title>Performance comparison of AutoML frameworks</title>
</caption>
<table>
<colgroup>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th>AutoML framework</th>
<th>Best model</th>
<th>Accuracy (%)</th>
<th>Training time</th>
</tr>
</thead>
<tbody>
<tr>
<td>FLAML</td>
<td>RandomForestClassifier</td>
<td><bold>99.85</bold></td>
<td>37 min and 52 s</td>
</tr>
<tr>
<td>TPOT</td>
<td>ExtraTreesClassifier</td>
<td>99.49</td>
<td>2 h, 45 min, 16 s</td>
</tr>
<tr>
<td>EvalML</td>
<td>XGBoostClassifier</td>
<td>99.38</td>
<td>12.5 s</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Specific hyperparameter settings, detailed in <xref ref-type="table" rid="table-24">Table 24</xref>, can reveal how each framework fine-tunes the models it uses. FLAML optimized Random Forest with 92 estimators and entropy splitting as well as feature sampling control, while TPOT specialized Extra Trees to have constraints on split and leaf samples. XGBoost configuration for EvalML used a max depth of 6, learning rate (<monospace>eta</monospace>) of 0.1 with 100 estimators and was supported by automated preprocessing steps to include label encoding, imputation, undersampling as well column selection. These findings highlight that, even though FLAML returned the accurate model overall but EvalML&#x2019;s trade-off between accuracy and computation was better than other methods, which is very beneficial in situations for a fast convergence.</p>
<table-wrap id="table-24">
<label>Table 24</label>
<caption>
<title>Optimal hyperparameters according to the models</title>
</caption>
<table>
<colgroup>
<col align="center"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th>AutoML</th>
<th>Best hyperparameter results</th>
</tr>
</thead>
<tbody>
<tr>
<td>FLAML</td>
<td><monospace>Best ML leaner: rf, Best hyperparameter config: { &#x2018;n_estimators&#x2019;: 92, &#x2018;max_features&#x2019;: 0.6193, &#x2018;max_leaves&#x2019;: 30482, &#x2018;criterion&#x2019;: &#x2018;entropy&#x2019; }</monospace></td>
</tr>
<tr>
<td>TPOT</td>
<td><monospace>ExtraTreesClassifier(max_features &#x003D; 0.8144, min_samples_leaf &#x003D; 17, min_samples_split &#x003D; 15, n_jobs &#x003D; 1)</monospace></td>
</tr>
<tr>
<td>EvalML</td>
<td><monospace>XGBoostClassifier(eta &#x003D; 0.1, max_depth &#x003D; 6, min_child_weight &#x003D; 1, n_estimators &#x003D; 100, n_jobs &#x003D; &#x2212;1, eval_metric &#x003D; &#x2018;logloss&#x2019;)</monospace></td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
</sec>
<sec id="s5">
<label>5</label>
<title>Limitations</title>
<p>The experiments were performed in a laboratory setting with an offline dataset. The AutoML techniques used in the study are demanding as they need storage of previous iterations&#x2019; results, to reutilize previous computations for the following iteration. This phenomenon results in a conflict between memory (or CPU) and elapsing time for computation. In certain situations, both resources get heavily taxed as the earlier trial parameter results have to be kept in memory for further operations. While some AutoML tools, including H2O itself (Rob0 machine learning platform among others) offer model explainability features, this was not investigated in this paper. To ensure evaluations are fair and consistent, these explainability tools were intentionally excluded. Another drawback of this study is that several AutoML modules are maintained and updated regularly leading to differences in their performance. Therefore, different results may be obtained due to other implementations in the future, and they may even improve upon what has been presented [<xref ref-type="bibr" rid="ref-27">27</xref>&#x2013;<xref ref-type="bibr" rid="ref-29">29</xref>].</p>
</sec>
<sec id="s6">
<label>6</label>
<title>State-of-the-Art-Comparison</title>
<p>To assess the efficiency of classical and automated approaches to Android ransomware detection, a comparative study was conducted that integrated findings from the current literature with the intended hybrid AutoML ensemble approach, as depicted in <xref ref-type="table" rid="table-25">Table 25</xref>. Classic ensemble methods, including Bagging, Gradient Boosting (GB), and Random Forest, performed remarkably well in terms of detection accuracy, with all models achieving an accuracy of over 99.75%. Interestingly, the Bagging model achieved the best performance at 99.84%, closely followed by GBM at 99.83% and Random Forest at 99.75%. This reflects their ability to perform well under diverse ransomware behaviors. Other methods, such as LightGBM, HistGradientBoosting, XGBoost, and CatBoost, also reported high performances. Nonetheless, AdaBoost performed poorly, with an accuracy of 59.67%, likely due to its vulnerability to imbalanced and multiclass data distributions. Conversely, the emergence of Automated Machine Learning (AutoML) frameworks has transformed model building by reducing the need for extensive manual configuration and still achieving competitive or even better results. Among these, FLAML excelled by achieving a peak precision of 99.85% within a restricted time, proving to be both efficient and accurate. TPOT, which utilizes genetic programming to search for fully optimized pipelines, achieved an accuracy of 99.49%, albeit at the expense of a longer execution time. The EvalML, whose combined preprocessing and model tuning achieved a better 99.38% accuracy through a more efficient process. Alternative research approaches have also proposed architectures, such as DroidAutoML, a microservice-based framework that claims an improvement of up to 11% over more conventional tools like Drebin and MaMaDroid. AutoML-generated deep learning models have been successfully applied to large-scale malware datasets, such as SOREL-20M and EMBER-2018, achieving impressive detection performance in both static and online analysis settings. These observations demonstrate the potential of AutoML as a highly effective and scalable solution for detecting Android ransomware.</p>
<table-wrap id="table-25">
<label>Table 25</label>
<caption>
<title>Comparison of state-of-the-art android ransomware detection methods</title>
</caption>
<table>
<colgroup>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th>Reference</th>
<th>Year</th>
<th>Class.</th>
<th>Sel. Feat.</th>
<th>Dataset</th>
<th>Main model</th>
<th>Acc. (%)</th>
</tr>
</thead>
<tbody>
<tr>
<td>Hossain et al. [<xref ref-type="bibr" rid="ref-14">14</xref>]</td>
<td>2024</td>
<td>Binary &#x0026; Multi</td>
<td>10</td>
<td>Kaggle Android Ransomware (203,556 samples)</td>
<td>Bagging Ensemble</td>
<td>100 (Bin.), 99.82 (Multi)</td>
</tr>
<tr>
<td>Ahmed et al. [<xref ref-type="bibr" rid="ref-17">17</xref>]</td>
<td>2024</td>
<td>Binary</td>
<td>19</td>
<td>Kaggle Android Ransomware (392,035 samples)</td>
<td>Decision Tree</td>
<td>97.24</td>
</tr>
<tr>
<td>Farhan [<xref ref-type="bibr" rid="ref-30">30</xref>]</td>
<td>2024</td>
<td>Binary</td>
<td>20</td>
<td>AndroZoo (benign), RansomProber (malicious)</td>
<td>FNN (Keras; 3 dense layers)</td>
<td>98.9</td>
</tr>
<tr>
<td>Sharma et al. [<xref ref-type="bibr" rid="ref-31">31</xref>]</td>
<td>2021</td>
<td>Binary</td>
<td>20</td>
<td>RansomProber (2721 ransomware) &#x002B; AndroZoo (2000 benign)</td>
<td>Random Forest</td>
<td>99.67</td>
</tr>
<tr>
<td><bold>This Work</bold></td>
<td>2025</td>
<td>Multiclass</td>
<td>8</td>
<td>Kaggle Android Ransomware (392,035 samples)</td>
<td>Bagging; FLAML (AutoML)</td>
<td><bold>99.84</bold>; <bold>99.85</bold></td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s7">
<label>7</label>
<title>Conclusion and Future Work</title>
<p>This study presents the use of ensemble learning models and AutoML-powered pipelines in the context of Android ransomware detection. Using a three-stage hybrid feature selection method based on hierarchical class grouping, the proposed framework proved to be highly resilient. The Bagging and FLAML were identified as the best-performing classifiers. All the above processes, along with stratified train-test splits, cross-validation, learning curves, confusion matrices, and ROC analysis, were performed to ensure the results&#x2019; reliability and reproducibility. The results highlight the power of AutoML to reduce human effort, speed up pipeline building, and obtain competitively or even better accuracy compared to manually tuned ensemble baselines. These findings make AutoML a promising candidate for future intrusion detection systems, particularly in settings where rapid model adaptation is needed to mitigate the latest threats. Although the showcased framework demonstrates excellent performance when modeled in controlled laboratory environments, its industrial scalability and real-world usability have yet to be proven. Subsequent work ought to focus on deployment experiments on big scales in production-like network environments to test inference latency, throughput, resource usage, and integration overhead. Furthermore, longitudinal testing on streaming network traffic is recommended to assess the framework&#x2019;s robustness against concept drift and the dynamic nature of ransomware variants. Such findings will provide strong proof of concept for the framework&#x2019;s applicability to security operations centers (SOCs) and establish a compelling argument for its real-world deployment.</p>
</sec>
</body>
<back>
<ack>
<p>Not applicable.</p>
</ack>
<sec>
<title>Funding Statement</title>
<p>This work was supported through the Ongoing Research Funding Program (ORF-2025-498), King Saud University, Riyadh, Saudi Arabia.</p>
</sec>
<sec>
<title>Author Contributions</title>
<p>Kirubavathi Ganapathiyappan conceptualized the study, supervised the overall research framework, and coordinated the methodology and validation strategies. Chahana Ravikumar conducted the experimental implementation, including ensemble model training, AutoML pipeline integration, and performance evaluation. Raghul Alagunachimuthu Ranganayaki was responsible for data preprocessing, designing hybrid feature selection, and developing hierarchical clustering-based class regrouping techniques. Ayman Altameem provided methodological guidance, technical insights on experimental design, and a critical review of the manuscript. Ateeq Ur Rehman contributed to the optimization and analysis of AutoML models, as well as manuscript writing and critical review. Ahmad Almogren contributed to writing, reviewing, and editing, provided comparative benchmarking with state-of-the-art approaches, and administered the project, providing technical guidance throughout the study. All authors reviewed the results and approved the final version of the manuscript.</p>
</sec>
<sec sec-type="data-availability">
<title>Availability of Data and Materials</title>
<p>Data obtained from Kaggle can be found at <ext-link ext-link-type="uri" xlink:href="https://www.kaggle.com/datasets/subhajournal/android-ransomware-detection">https://www.kaggle.com/datasets/subhajournal/android-ransomware-detection</ext-link> (accessed on 25 October 2025).</p>
</sec>
<sec>
<title>Ethics Approval</title>
<p>Not applicable.</p>
</sec>
<sec sec-type="COI-statement">
<title>Conflicts of Interest</title>
<p>The authors declare no conflicts of interest to report regarding the present study.</p>
</sec>
<ref-list content-type="authoryear">
<title>References</title>
<ref id="ref-1"><label>[1]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Almomani</surname> <given-names>I</given-names></string-name>, <string-name><surname>Qaddoura</surname> <given-names>R</given-names></string-name>, <string-name><surname>Habib</surname> <given-names>M</given-names></string-name>, <string-name><surname>Alsoghyer</surname> <given-names>S</given-names></string-name>, <string-name><surname>Al Khayer</surname> <given-names>A</given-names></string-name>, <string-name><surname>Aljarah</surname> <given-names>I</given-names></string-name>, <etal>et al.</etal></person-group> <article-title>Android ransomware detection based on a hybrid evolutionary approach in the context of highly imbalanced data</article-title>. <source>IEEE Access</source>. <year>2021</year>;<volume>9</volume>:<fpage>57674</fpage>&#x2013;<lpage>91</lpage>. doi:<pub-id pub-id-type="doi">10.1109/ACCESS.2021.3071450</pub-id>.</mixed-citation></ref>
<ref id="ref-2"><label>[2]</label><mixed-citation publication-type="book"><person-group person-group-type="author"><string-name><surname>Kirubavathi</surname> <given-names>G</given-names></string-name>, <string-name><surname>Varun Vijay</surname> <given-names>RN</given-names></string-name></person-group>. <chapter-title>Composition and adaptation of ensemble learning for Android malware detection</chapter-title>. In: <person-group person-group-type="editor"><string-name><surname>Roy</surname> <given-names>NR</given-names></string-name>, <string-name><surname>Singh</surname> <given-names>AP</given-names></string-name>, <string-name><surname>Kumar</surname> <given-names>P</given-names></string-name>, <string-name><surname>Kaul</surname> <given-names>A</given-names></string-name></person-group>, editors. <source>Cyber security and digital forensics. redcysec 2024. Lecture Notes in Networks and Systems</source>. <publisher-loc>Singapore</publisher-loc>: <publisher-name>Springer</publisher-name>; <year>2025</year>. p. <fpage>445</fpage>&#x2013;<lpage>57</lpage>. doi:<pub-id pub-id-type="doi">10.1007/978-981-96-3284-8_35</pub-id>.</mixed-citation></ref>
<ref id="ref-3"><label>[3]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Kirubavathi</surname> <given-names>G</given-names></string-name>, <string-name><surname>Regis Anne</surname> <given-names>W</given-names></string-name>, <string-name><surname>Sridevi</surname> <given-names>UK</given-names></string-name></person-group>. <article-title>A recent review of ransomware attacks on healthcare industries</article-title>. <source>Int J Syst Assur Eng Manag</source>. <year>2024</year>;<volume>15</volume>:<fpage>5078</fpage>&#x2013;<lpage>96</lpage>. doi:<pub-id pub-id-type="doi">10.1007/s13198-024-02496-4</pub-id>.</mixed-citation></ref>
<ref id="ref-4"><label>[4]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Kirubavathi</surname> <given-names>G</given-names></string-name>, <string-name><surname>Anne</surname> <given-names>WR</given-names></string-name></person-group>. <article-title>Behavioral based detection of android ransomware using machine learning techniques</article-title>. <source>Int J Syst Assur Eng Manag</source>. <year>2024</year>;<volume>15</volume>:<fpage>4404</fpage>&#x2013;<lpage>25</lpage>. doi:<pub-id pub-id-type="doi">10.1007/s13198-024-02439-z</pub-id>.</mixed-citation></ref>
<ref id="ref-5"><label>[5]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Alraizza</surname> <given-names>A</given-names></string-name>, <string-name><surname>Algarni</surname> <given-names>A</given-names></string-name></person-group>. <article-title>Ransomware detection using machine learning: a survey</article-title>. <source>Big Data Cogn Comput</source>. <year>2023</year>;<volume>7</volume>(<issue>3</issue>):<fpage>143</fpage>. doi:<pub-id pub-id-type="doi">10.3390/bdcc7030143</pub-id>.</mixed-citation></ref>
<ref id="ref-6"><label>[6]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Hasan</surname> <given-names>MM</given-names></string-name>, <string-name><surname>Biswas</surname> <given-names>MSS</given-names></string-name>, <string-name><surname>Karim</surname> <given-names>MS</given-names></string-name>, <string-name><surname>Rahman</surname> <given-names>MKH</given-names></string-name>, <string-name><surname>Ahmed</surname> <given-names>MFU</given-names></string-name>, <string-name><surname>Shatabda</surname> <given-names>S</given-names></string-name>, <etal>et al</etal></person-group>. <article-title>Enhancing malware detection with feature selection and scaling techniques using machine learning models</article-title>. <source>Sci Rep</source>. <year>2025</year>;<volume>15</volume>(<issue>1</issue>):<fpage>93447</fpage>. doi:<pub-id pub-id-type="doi">10.1038/s41598-025-93447-x</pub-id>; <pub-id pub-id-type="pmid">40097688</pub-id></mixed-citation></ref>
<ref id="ref-7"><label>[7]</label><mixed-citation publication-type="conf-proc"><person-group person-group-type="author"><string-name><surname>Adriansyah</surname> <given-names>R</given-names></string-name>, <string-name><surname>Sukarno</surname> <given-names>P</given-names></string-name>, <string-name><surname>Wardana</surname> <given-names>AA</given-names></string-name></person-group>. <article-title>Android malware detection using ensemble learning and feature selection with insights from SHAP explainable AI</article-title>. In: <conf-name>Proceedings of the 2024 International Conference on Soft Computing and Machine Intelligence (ISCMI); 2024 Nov 22&#x2013;23</conf-name>; <publisher-loc>Melbourne, VIC, Australia</publisher-loc>. p. <fpage>187</fpage>&#x2013;<lpage>92</lpage>. doi:<pub-id pub-id-type="doi">10.1109/ISCMI63661.2024.10851666</pub-id>.</mixed-citation></ref>
<ref id="ref-8"><label>[8]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Mahindru</surname> <given-names>A</given-names></string-name>, <string-name><surname>Arora</surname> <given-names>H</given-names></string-name>, <string-name><surname>Kumar</surname> <given-names>A</given-names></string-name>, <string-name><surname>Gupta</surname> <given-names>SK</given-names></string-name>, <string-name><surname>Mahajan</surname> <given-names>S</given-names></string-name>, <string-name><surname>Kadry</surname> <given-names>S</given-names></string-name>, <etal>et al</etal></person-group>. <article-title>PermDroid: a framework developed using proposed feature selection approach and machine learning techniques for Android malware detection</article-title>. <source>Sci Rep</source>. <year>2024</year>;<volume>14</volume>(<issue>1</issue>):<fpage>10724</fpage>. doi:<pub-id pub-id-type="doi">10.1038/s41598-024-60982-y</pub-id>; <pub-id pub-id-type="pmid">38730228</pub-id></mixed-citation></ref>
<ref id="ref-9"><label>[9]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Alsubaei</surname> <given-names>FS</given-names></string-name>, <string-name><surname>Almazroi</surname> <given-names>AA</given-names></string-name>, <string-name><surname>Atwa</surname> <given-names>WS</given-names></string-name>, <string-name><surname>Almazroi</surname> <given-names>AA</given-names></string-name>, <string-name><surname>Ayub</surname> <given-names>N</given-names></string-name>, <string-name><surname>Jhanjhi</surname> <given-names>NZ</given-names></string-name></person-group>. <article-title>BERT ensemble based MBR framework for Android malware detection</article-title>. <source>Sci Rep</source>. <year>2025</year>;<volume>15</volume>:<fpage>14027</fpage>. doi:<pub-id pub-id-type="doi">10.1038/s41598-025-14027-1</pub-id>.</mixed-citation></ref>
<ref id="ref-10"><label>[10]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Hossain</surname> <given-names>MA</given-names></string-name>, <string-name><surname>Islam</surname> <given-names>MS</given-names></string-name></person-group>. <article-title>A novel hybrid feature selection and ensemble-based machine learning approach for botnet detection</article-title>. <source>Sci Rep</source>. <year>2023</year>;<volume>13</volume>(<issue>1</issue>):<fpage>21207</fpage>. doi:<pub-id pub-id-type="doi">10.1038/s41598-023-48230-1</pub-id>; <pub-id pub-id-type="pmid">38040793</pub-id></mixed-citation></ref>
<ref id="ref-11"><label>[11]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Mohanraj</surname> <given-names>A</given-names></string-name>, <string-name><surname>Sivasankari</surname> <given-names>K</given-names></string-name></person-group>. <article-title>Android traffic malware analysis and detection using ensemble classifier</article-title>. <source>Ain Shams Eng J</source>. <year>2024</year>;<volume>15</volume>(<issue>12</issue>):<fpage>103134</fpage>. doi:<pub-id pub-id-type="doi">10.1016/j.asej.2024.103134</pub-id>.</mixed-citation></ref>
<ref id="ref-12"><label>[12]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Alhogail</surname> <given-names>A</given-names></string-name>, <string-name><surname>Alharbi</surname> <given-names>RA</given-names></string-name></person-group>. <article-title>Effective ML-based Android malware detection and categorization</article-title>. <source>Electronics</source>. <year>2025</year>;<volume>14</volume>(<issue>8</issue>):<fpage>1486</fpage>. doi:<pub-id pub-id-type="doi">10.3390/electronics14081486</pub-id>.</mixed-citation></ref>
<ref id="ref-13"><label>[13]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Pai</surname> <given-names>V</given-names></string-name>, <string-name><surname>Pai</surname> <given-names>K</given-names></string-name>, <string-name><surname>Manjunatha</surname> <given-names>S</given-names></string-name>, <string-name><surname>Hirmeti</surname> <given-names>S</given-names></string-name>, <string-name><surname>Bhat</surname> <given-names>VV</given-names></string-name></person-group>. <article-title>Adaptive network anomaly detection using machine learning approaches</article-title>. <source>EURASIP J Inf Secur</source>. <year>2025</year>;<volume>2025</volume>(<issue>1</issue>):<fpage>29</fpage>. doi:<pub-id pub-id-type="doi">10.1186/s13635-025-00216-4</pub-id>.</mixed-citation></ref>
<ref id="ref-14"><label>[14]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Hossain</surname> <given-names>MA</given-names></string-name>, <string-name><surname>Hasan</surname> <given-names>T</given-names></string-name>, <string-name><surname>Ahmed</surname> <given-names>F</given-names></string-name>, <string-name><surname>Cheragee</surname> <given-names>SH</given-names></string-name>, <string-name><surname>Kanchan</surname> <given-names>MH</given-names></string-name>, <string-name><surname>Haque</surname> <given-names>MA</given-names></string-name></person-group>. <article-title>Towards superior Android ransomware detection: an ensemble machine learning perspective</article-title>. <source>Cybersecur Applicat</source>. <year>2025</year>;<volume>3</volume>:<fpage>100076</fpage>. doi:<pub-id pub-id-type="doi">10.1016/j.csa.2024.100076</pub-id>.</mixed-citation></ref>
<ref id="ref-15"><label>[15]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Nasser</surname> <given-names>AR</given-names></string-name>, <string-name><surname>Hasan</surname> <given-names>AM</given-names></string-name>, <string-name><surname>Humaidi</surname> <given-names>AJ</given-names></string-name></person-group>. <article-title>DL-AMDet: deep learning-based malware detector for android</article-title>. <source>Intell Syst Appl</source>. <year>2024</year>;<volume>21</volume>:<fpage>200318</fpage>. doi:<pub-id pub-id-type="doi">10.1016/j.iswa.2023.200318</pub-id>.</mixed-citation></ref>
<ref id="ref-16"><label>[16]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Alamro</surname> <given-names>H</given-names></string-name>, <string-name><surname>Mtouaa</surname> <given-names>W</given-names></string-name>, <string-name><surname>Aljameel</surname> <given-names>S</given-names></string-name>, <string-name><surname>Salama</surname> <given-names>AS</given-names></string-name>, <string-name><surname>Hamza</surname> <given-names>MA</given-names></string-name>, <string-name><surname>Othman</surname> <given-names>AY</given-names></string-name></person-group>. <article-title>Automated Android malware detection using optimal ensemble learning approach for cybersecurity</article-title>. <source>IEEE Access</source>. <year>2023</year>;<volume>11</volume>:<fpage>72509</fpage>&#x2013;<lpage>17</lpage>. doi:<pub-id pub-id-type="doi">10.1109/ACCESS.2023.3294263</pub-id>.</mixed-citation></ref>
<ref id="ref-17"><label>[17]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Ahmed</surname> <given-names>AA</given-names></string-name>, <string-name><surname>Shaahid</surname> <given-names>A</given-names></string-name>, <string-name><surname>Alnasser</surname> <given-names>F</given-names></string-name>, <string-name><surname>Alfaddagh</surname> <given-names>S</given-names></string-name>, <string-name><surname>Binagag</surname> <given-names>S</given-names></string-name>, <string-name><surname>Alqahtani</surname> <given-names>D</given-names></string-name></person-group>. <article-title>Android ransomware detection using supervised machine learning techniques based on traffic analysis</article-title>. <source>Sensors</source>. <year>2024</year>;<volume>24</volume>(<issue>1</issue>):<fpage>189</fpage>. doi:<pub-id pub-id-type="doi">10.3390/s24010189</pub-id>; <pub-id pub-id-type="pmid">38203051</pub-id></mixed-citation></ref>
<ref id="ref-18"><label>[18]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Khan</surname> <given-names>I</given-names></string-name>, <string-name><surname>Din</surname> <given-names>F</given-names></string-name>, <string-name><surname>Khan</surname> <given-names>F</given-names></string-name>, <string-name><surname>Saqib</surname> <given-names>S</given-names></string-name>, <string-name><surname>Ullah</surname> <given-names>S</given-names></string-name>, <string-name><surname>Haider</surname> <given-names>Z</given-names></string-name>, <etal>et al</etal></person-group>. <article-title>Recurrent neural network and multi-factor feature filtering for ransomware detection in Android apps</article-title>. <source>Int J Innovat Sci Technol</source>. <year>2024</year>;<volume>6</volume>:<fpage>1021</fpage>&#x2013;<lpage>30</lpage>.</mixed-citation></ref>
<ref id="ref-19"><label>[19]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Ali</surname> <given-names>M</given-names></string-name>, <string-name><surname>Shiaeles</surname> <given-names>S</given-names></string-name>, <string-name><surname>Bendiab</surname> <given-names>G</given-names></string-name>, <string-name><surname>Ghita</surname> <given-names>B</given-names></string-name></person-group>. <article-title>MALGRA: machine learning and N-gram malware feature extraction and detection system</article-title>. <source>Electronics</source>. <year>2020</year>;<volume>9</volume>(<issue>11</issue>):<fpage>1777</fpage>. doi:<pub-id pub-id-type="doi">10.3390/electronics9111777</pub-id>.</mixed-citation></ref>
<ref id="ref-20"><label>[20]</label><mixed-citation publication-type="other"><person-group person-group-type="author"><string-name><surname>Brown</surname> <given-names>A</given-names></string-name>, <string-name><surname>Gupta</surname> <given-names>M</given-names></string-name>, <string-name><surname>Abdelsalam</surname> <given-names>M</given-names></string-name></person-group>. <article-title>Automated machine learning for deep learning-based malware detection</article-title>. <comment>arXiv:2303.01679. 2023</comment>.</mixed-citation></ref>
<ref id="ref-21"><label>[21]</label><mixed-citation publication-type="conf-proc"><person-group person-group-type="author"><string-name><surname>Bromberg</surname> <given-names>YD</given-names></string-name>, <string-name><surname>Gitzinger</surname> <given-names>L</given-names></string-name></person-group>. <article-title>DroidAutoML: a microservice architecture to automate the evaluation of Android machine learning detection systems</article-title>. In: <conf-name>Lecture Notes in Computer Science. IFIP International Conference on Distributed Applications and Interoperable Systems</conf-name>. <publisher-loc>Cham, Switzerland</publisher-loc>: <publisher-name>Springer</publisher-name>; <year>2020</year>. p. <fpage>148</fpage>&#x2013;<lpage>65</lpage>. doi:<pub-id pub-id-type="doi">10.1007/978-3-030-50323-9_10</pub-id>.</mixed-citation></ref>
<ref id="ref-22"><label>[22]</label><mixed-citation publication-type="conf-proc"><person-group person-group-type="author"><string-name><surname>Masum</surname> <given-names>M</given-names></string-name>, <string-name><surname>Hossain Faruk</surname> <given-names>MJ</given-names></string-name>, <string-name><surname>Shahriar</surname> <given-names>H</given-names></string-name>, <string-name><surname>Qian</surname> <given-names>K</given-names></string-name>, <string-name><surname>Lo</surname> <given-names>D</given-names></string-name>, <string-name><surname>Adnan</surname> <given-names>MI</given-names></string-name></person-group>. <article-title>Ransomware classification and detection with machine learning algorithms</article-title>. In: <conf-name>Proceedings of the IEEE 12th Annual Computing and Communication Workshop and Conference (CCWC); 2022 Jan 26&#x2013;29</conf-name>; <publisher-loc>Las Vegas, NV, USA</publisher-loc>. p. <fpage>316</fpage>&#x2013;<lpage>22</lpage>. doi:<pub-id pub-id-type="doi">10.1109/CCWC54503.2022.9720869</pub-id>.</mixed-citation></ref>
<ref id="ref-23"><label>[23]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Khammas</surname> <given-names>BM</given-names></string-name></person-group>. <article-title>Ransomware detection using random forest technique</article-title>. <source>ICT Express</source>. <year>2020</year>;<volume>6</volume>(<issue>4</issue>):<fpage>325</fpage>&#x2013;<lpage>31</lpage>. doi:<pub-id pub-id-type="doi">10.1016/j.icte.2020.11.001</pub-id>.</mixed-citation></ref>
<ref id="ref-24"><label>[24]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Chakraborty</surname> <given-names>S</given-names></string-name></person-group>. <article-title>Android ransomware detection dataset</article-title>. <source>Kaggle</source>. <year>2023</year>. doi:<pub-id pub-id-type="doi">10.34740/KAGGLE/DSV/4987535</pub-id>; <pub-id pub-id-type="pmid">34593810</pub-id></mixed-citation></ref>
<ref id="ref-25"><label>[25]</label><mixed-citation publication-type="book"><person-group person-group-type="author"><string-name><surname>Gyimah</surname> <given-names>NK</given-names></string-name>, <string-name><surname>Akinie</surname> <given-names>R</given-names></string-name>, <string-name><surname>Mwakalonge</surname> <given-names>J</given-names></string-name>, <string-name><surname>Izison</surname> <given-names>B</given-names></string-name>, <string-name><surname>Mukwaya</surname> <given-names>A</given-names></string-name>, <string-name><surname>Ruganuza</surname> <given-names>D</given-names></string-name>, <etal>et al.</etal></person-group> <chapter-title>An AutoML-based approach for Network Intrusion Detection</chapter-title>. In: <source>SoutheastCon 2025</source>. <publisher-loc>Piscataway, NJ, USA</publisher-loc>: <publisher-name>IEEE</publisher-name>; <year>2025</year>. p. <fpage>1177</fpage>&#x2013;<lpage>83</lpage>. doi:<pub-id pub-id-type="doi">10.1109/southeastcon56624.2025.10971461</pub-id>.</mixed-citation></ref>
<ref id="ref-26"><label>[26]</label><mixed-citation publication-type="book"><person-group person-group-type="author"><string-name><surname>Olson</surname> <given-names>R</given-names></string-name>, <string-name><surname>Moore</surname> <given-names>J</given-names></string-name></person-group>. <chapter-title>TPOT: a tree-based pipeline optimization tool for automating machine learning</chapter-title>. In: <source>Automated machine learning</source>. <publisher-loc>Cham, Switzerland</publisher-loc>: <publisher-name>Springer</publisher-name>; <year>2019</year>. p. <fpage>151</fpage>&#x2013;<lpage>60</lpage>. doi:<pub-id pub-id-type="doi">10.1007/978-3-030-05318-5_8</pub-id>.</mixed-citation></ref>
<ref id="ref-27"><label>[27]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Neto</surname> <given-names>EC</given-names></string-name>, <string-name><surname>Iqbal</surname> <given-names>S</given-names></string-name>, <string-name><surname>Buffett</surname> <given-names>S</given-names></string-name>, <string-name><surname>Sultana</surname> <given-names>M</given-names></string-name>, <string-name><surname>Taylor</surname> <given-names>A</given-names></string-name></person-group>. <article-title>Deep learning for intrusion detection in emerging technologies: a comprehensive survey and new perspectives</article-title>. <source>Artif Intell Rev</source>. <year>2025</year>;<volume>58</volume>(<issue>11</issue>):<fpage>340</fpage>. doi:<pub-id pub-id-type="doi">10.1007/s10462-025-11346-z</pub-id>.</mixed-citation></ref>
<ref id="ref-28"><label>[28]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Bhukya</surname> <given-names>R</given-names></string-name>, <string-name><surname>Moeed</surname> <given-names>SA</given-names></string-name>, <string-name><surname>Medavaka</surname> <given-names>A</given-names></string-name>, <string-name><surname>Khadidos</surname> <given-names>AO</given-names></string-name>, <string-name><surname>Khadidos</surname> <given-names>AO</given-names></string-name>, <string-name><surname>Selvarajan</surname> <given-names>S</given-names></string-name></person-group>. <article-title>SPARK and SAD: leading-edge deep learning frameworks for robust and effective intrusion detection in SCADA systems</article-title>. <source>Int J Crit Infrastruct Prot</source>. <year>2025</year>;<volume>49</volume>:<fpage>100759</fpage>. doi:<pub-id pub-id-type="doi">10.1016/j.ijcip.2025.100759</pub-id>.</mixed-citation></ref>
<ref id="ref-29"><label>[29]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Selvarajan</surname> <given-names>S</given-names></string-name>, <string-name><surname>Manoharan</surname> <given-names>H</given-names></string-name>, <string-name><surname>Abdelhaq</surname> <given-names>M</given-names></string-name>, <string-name><surname>Khadidos</surname> <given-names>AO</given-names></string-name>, <string-name><surname>Khadidos</surname> <given-names>A</given-names></string-name>, <string-name><surname>Alsaqour</surname> <given-names>R</given-names></string-name>, <etal>et al</etal></person-group>. <article-title>Diagnostic behavior analysis of profuse data intrusions in cyber physical systems using adversarial learning techniques</article-title>. <source>Sci Rep</source>. <year>2025</year>;<volume>15</volume>(<issue>1</issue>):<fpage>7287</fpage>. doi:<pub-id pub-id-type="doi">10.1038/s41598-025-91856-6</pub-id>; <pub-id pub-id-type="pmid">40025181</pub-id></mixed-citation></ref>
<ref id="ref-30"><label>[30]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Farhan</surname> <given-names>RI</given-names></string-name></person-group>. <article-title>An approach to Android ransomware detection using deep learning</article-title>. <source>Wasit J Pure Sci</source>. <year>2024</year>;<volume>3</volume>(<issue>1</issue>):<fpage>90</fpage>&#x2013;<lpage>4</lpage>. doi:<pub-id pub-id-type="doi">10.31185/wjps.325</pub-id>.</mixed-citation></ref>
<ref id="ref-31"><label>[31]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Sharma</surname> <given-names>S</given-names></string-name>, <string-name><surname>Challa</surname> <given-names>R</given-names></string-name>, <string-name><surname>Kunmar</surname> <given-names>R</given-names></string-name></person-group>. <article-title>An ensemble-based supervised machine learning framework for Android ransomware detection</article-title>. <source>Int Arab J Information Technology</source>. <year>2021</year>;<volume>18</volume>(<issue>3A</issue>):<fpage>422</fpage>&#x2013;<lpage>9</lpage>.</mixed-citation></ref>
</ref-list>
</back></article>




























