<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.1 20151215//EN" "http://jats.nlm.nih.gov/publishing/1.1/JATS-journalpublishing1.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:mml="http://www.w3.org/1998/Math/MathML" xml:lang="en" article-type="research-article" dtd-version="1.1">
<front>
<journal-meta>
<journal-id journal-id-type="pmc">CMES</journal-id>
<journal-id journal-id-type="nlm-ta">CMES</journal-id>
<journal-id journal-id-type="publisher-id">CMES</journal-id>
<journal-title-group>
<journal-title>Computer Modeling in Engineering &#x0026; Sciences</journal-title>
</journal-title-group>
<issn pub-type="epub">1526-1506</issn>
<issn pub-type="ppub">1526-1492</issn>
<publisher>
<publisher-name>Tech Science Press</publisher-name>
<publisher-loc>USA</publisher-loc>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">73555</article-id>
<article-id pub-id-type="doi">10.32604/cmes.2025.073555</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Article</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Concrete Strength Prediction Using Machine Learning and Somersaulting Spider Optimizer</article-title>
<alt-title alt-title-type="left-running-head">Concrete Strength Prediction Using Machine Learning and Somersaulting Spider Optimizer</alt-title>
<alt-title alt-title-type="right-running-head">Concrete Strength Prediction Using Machine Learning and Somersaulting Spider Optimizer</alt-title>
</title-group>
<contrib-group>
<contrib id="author-1" contrib-type="author" corresp="yes">
<name name-style="western"><surname>Eid</surname><given-names>Marwa M.</given-names></name><xref ref-type="aff" rid="aff-1">1</xref><xref ref-type="aff" rid="aff-2">2</xref><email>marwa.3eeed@gmail.com</email></contrib>
<contrib id="author-2" contrib-type="author">
<name name-style="western"><surname>Alhussan</surname><given-names>Amel Ali</given-names></name><xref ref-type="aff" rid="aff-3">3</xref></contrib>
<contrib id="author-3" contrib-type="author">
<name name-style="western"><surname>Mattar</surname><given-names>Ebrahim A.</given-names></name><xref ref-type="aff" rid="aff-4">4</xref></contrib>
<contrib id="author-4" contrib-type="author" corresp="yes">
<name name-style="western"><surname>Khodadadi</surname><given-names>Nima</given-names></name><xref ref-type="aff" rid="aff-5">5</xref><email>nima.khodadadi@miami.edu</email></contrib>
<contrib id="author-5" contrib-type="author">
<name name-style="western"><surname>El-Kenawy</surname><given-names>El-Sayed M.</given-names></name><xref ref-type="aff" rid="aff-6">6</xref><xref ref-type="aff" rid="aff-7">7</xref></contrib>
<aff id="aff-1"><label>1</label><institution>Faculty of Artificial Intelligence, Delta University for Science and Technology</institution>, <addr-line>Mansoura, 11152</addr-line>, <country>Egypt</country></aff>
<aff id="aff-2"><label>2</label><institution>Jadara Research Center, Jadara University</institution>, <addr-line>Irbid, 21110</addr-line>, <country>Jordan</country></aff>
<aff id="aff-3"><label>3</label><institution>Department of Computer Sciences, College of Computer and Information Sciences, Princess Nourah bint Abdulrahman University, P.O. Box 84428</institution>, <addr-line>Riyadh, 11671</addr-line>, <country>Saudi Arabia</country></aff>
<aff id="aff-4"><label>4</label><institution>College of Engineering, University of Bahrain</institution>, <addr-line>Sakhir, P.O. Box 32038</addr-line>, <country>Kingdom of Bahrain</country></aff>
<aff id="aff-5"><label>5</label><institution>Department of Civil and Architectural Engineering, University of Miami</institution>, <addr-line>Coral Gables, FL 33146</addr-line>, <country>USA</country></aff>
<aff id="aff-6"><label>6</label><institution>Department of Communications and Electronics, Delta Higher Institute of Engineering and Technology</institution>, <addr-line>Mansoura, 35111</addr-line>, <country>Egypt</country></aff>
<aff id="aff-7"><label>7</label><institution>Applied Science Research Center, Applied Science Private University</institution>, <addr-line>Amman, 11931</addr-line>, <country>Jordan</country></aff>
</contrib-group>
<author-notes>
<corresp id="cor1"><label>&#x002A;</label>Corresponding Authors: Marwa M. Eid. Email: <email>marwa.3eeed@gmail.com</email>; Nima Khodadadi. Email: <email>nima.khodadadi@miami.edu</email></corresp>
</author-notes>
<pub-date date-type="collection" publication-format="electronic">
<year>2026</year>
</pub-date>
<pub-date date-type="pub" publication-format="electronic">
<day>29</day><month>1</month><year>2026</year>
</pub-date>
<volume>146</volume>
<issue>1</issue>
<elocation-id>15</elocation-id>
<history>
<date date-type="received">
<day>20</day>
<month>09</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>29</day>
<month>12</month>
<year>2025</year>
</date>
</history>
<permissions>
<copyright-statement>&#x00A9; 2026 The Authors.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>Published by Tech Science Press.</copyright-holder>
<license xlink:href="https://creativecommons.org/licenses/by/4.0/">
<license-p>This work is licensed under a <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution 4.0 International License</ext-link>, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
</license>
</permissions>
<self-uri content-type="pdf" xlink:href="TSP_CMES_73555.pdf"></self-uri>
<abstract>
<p>Accurate prediction of concrete compressive strength is fundamental for optimizing mix designs, improving material utilization, and ensuring structural safety in modern construction. Traditional empirical methods often fail to capture the non-linear relationships among concrete constituents, especially with the growing use of supplementary cementitious materials and recycled aggregates. This study presents an integrated machine learning framework for concrete strength prediction, combining advanced regression models&#x2014;namely CatBoost&#x2014;with metaheuristic optimization algorithms, with a particular focus on the Somersaulting Spider Optimizer (SSO). A comprehensive dataset encompassing diverse mix proportions and material types was used to evaluate baseline machine learning models, including CatBoost, XGBoost, ExtraTrees, and RandomForest. Among these, CatBoost demonstrated superior accuracy across multiple performance metrics. To further enhance predictive capability, several bio-inspired optimizers were employed for hyperparameter tuning. The SSO-CatBoost hybrid achieved the lowest mean squared error and highest correlation coefficients, outperforming other metaheuristic approaches such as Genetic Algorithm, Particle Swarm Optimization, and Grey Wolf Optimizer. Statistical significance was established through Analysis of Variance and Wilcoxon signed-rank testing, confirming the robustness of the optimized models. The proposed methodology not only delivers improved predictive performance but also offers a transparent framework for mix design optimization, supporting data-driven decision making in sustainable and resilient infrastructure development.</p>
</abstract>
<kwd-group kwd-group-type="author">
<kwd>Concrete strength</kwd>
<kwd>machine learning</kwd>
<kwd>CatBoost</kwd>
<kwd>metaheuristic optimization</kwd>
<kwd>somersaulting spider optimizer</kwd>
<kwd>ensemble models</kwd>
</kwd-group>
<funding-group>
<award-group id="awg1">
<funding-source>Princess Nourah bint Abdulrahman University</funding-source>
<award-id>PNURSP2025R308</award-id>
</award-group>
</funding-group>
</article-meta>
</front>
<body>
<sec id="s1">
<label>1</label>
<title>Introduction</title>
<p>Concrete compressive strength prediction represents a fundamental challenge in modern construction engineering, directly impacting structural safety, material optimization, and sustainable infrastructure development [<xref ref-type="bibr" rid="ref-1">1</xref>]. Accurate prediction of concrete strength enables engineers to optimize mix designs, reduce material waste, ensure structural reliability, and minimize construction costs while maintaining desired performance characteristics [<xref ref-type="bibr" rid="ref-2">2</xref>]. Traditional empirical methods for strength estimation, while historically valuable, often prove inadequate for contemporary concrete formulations that incorporate supplementary cementitious materials, chemical admixtures, and recycled aggregates [<xref ref-type="bibr" rid="ref-3">3</xref>], which exhibit complex non-linear relationships between constituent materials and resulting mechanical properties.</p>
<p>The increasing complexity of modern concrete technology necessitates advanced computational approaches capable of modeling intricate interactions between multiple mix design parameters [<xref ref-type="bibr" rid="ref-4">4</xref>]. Machine learning algorithms have emerged as powerful alternatives to conventional methods, offering superior capability to capture non-linear relationships and complex dependencies inherent in concrete strength development [<xref ref-type="bibr" rid="ref-5">5</xref>]. Gradient boosting algorithms, particularly CatBoost, have demonstrated exceptional performance in regression tasks involving heterogeneous datasets with mixed feature types [<xref ref-type="bibr" rid="ref-6">6</xref>], making them highly suitable for concrete strength prediction applications [<xref ref-type="bibr" rid="ref-7">7</xref>,<xref ref-type="bibr" rid="ref-8">8</xref>].</p>
<p>However, the effectiveness of machine learning models critically depends on optimal hyperparameter configuration, which significantly influences predictive accuracy and generalization capability [<xref ref-type="bibr" rid="ref-9">9</xref>]. Traditional hyperparameter optimization methods, such as grid search and random search, often prove computationally expensive and may fail to identify global optima in complex search spaces [<xref ref-type="bibr" rid="ref-10">10</xref>]. This limitation has motivated the development and application of metaheuristic optimization algorithms that can efficiently explore large parameter spaces while balancing exploration and exploitation mechanisms [<xref ref-type="bibr" rid="ref-11">11</xref>].</p>
<p>In hyperparameter tuning, the application of bio-inspired optimization algorithms has been of particular interest as it can offer the advantage of multi-dimensional, non-convex optimization problems that are useful to address high-dimensional hyperparameter tuning problems [<xref ref-type="bibr" rid="ref-12">12</xref>]. It follows that these algorithms are inspired by nature and biological processes and develop superior search strategies capable of identifying the best solutions in challenging landscapes [<xref ref-type="bibr" rid="ref-13">13</xref>]. The Somersaulting Spider Optimizer (SSO) is a new bio-inspired metaheuristic algorithm that is inspired by the locomotion behavior of a unique type of spider known as <italic>Cebrennus rechenbergi</italic> that is capable of combining high-energy exploration behavior with low-energy exploitation behavior to produce balanced search behavior for complex optimization problems.</p>
<p>The prediction of concrete strength can be precise when using metaheuristic optimization with machine learning, which has a significant potential to enhance the power of concrete strength prediction tolerance, and thus, it can lead to the elimination of prediction errors in concrete strength prediction [<xref ref-type="bibr" rid="ref-14">14</xref>,<xref ref-type="bibr" rid="ref-15">15</xref>]. CatBoost is particularly efficient in work with concrete datasets due to the flexibility of the software with numerical data, the quality of automatic preprocessing, and anti-overfitting features of the used algorithm, but SSO also includes an effective search mechanism and adaptive energy management, which guarantee the optimal optimization process of the hyperparameters as possible [<xref ref-type="bibr" rid="ref-16">16</xref>].</p>
<p>This study presents a comprehensive framework for concrete compressive strength prediction that combines the Somersaulting Spider Optimizer with CatBoost algorithm to achieve superior predictive performance. The research makes several significant contributions to the field of computational concrete engineering:</p>
<p><bold><italic>Primary Contributions</italic></bold>
<list list-type="simple">
<list-item><label>1.</label><p><bold>Novel Bio-Inspired Optimization Algorithm:</bold> Introduction and detailed mathematical formulation of the Somersaulting Spider Optimizer (SSO), a novel metaheuristic algorithm inspired by the unique locomotion behavior of <italic>Cebrennus rechenbergi</italic>. The algorithm incorporates adaptive energy management and dual-phase movement mechanisms (somersaulting for exploration and rolling for exploitation) specifically designed for hyperparameter optimization in machine learning applications.</p></list-item>
<list-item><label>2.</label><p><bold>Advanced Hyperparameter Optimization Framework:</bold> Development of an integrated SSO-CatBoost framework that achieves significant performance improvements over baseline models. The proposed approach demonstrates a 24.6 percent reduction in mean squared error compared to the standard CatBoost implementation, with the SSO-optimized model achieving MSE of 12.61 vs. 16.73 for the baseline model.</p></list-item>
<list-item><label>3.</label><p><bold>Comprehensive Comparative Analysis:</bold> Systematic evaluation and comparison of SSO against seven other metaheuristic optimization algorithms (Differential Evolution, Grey Wolf Optimizer, Whale Optimization Algorithm, Genetic Algorithm, Bat Algorithm, Harris Hawks Optimization, and Particle Swarm Optimization) applied to CatBoost hyperparameter tuning for concrete strength prediction.</p></list-item>
<list-item><label>4.</label><p><bold>Rigorous Statistical Validation:</bold> Implementation of comprehensive statistical analysis including Analysis of Variance (ANOVA) and Wilcoxon signed-rank testing to establish statistical significance of the proposed methodology, ensuring robust validation of performance improvements and algorithmic effectiveness.</p></list-item>
</list></p>
<p>The remainder of this paper is organized as follows: <xref ref-type="sec" rid="s2">Section 2</xref> presents a comprehensive literature review of machine learning approaches in concrete strength prediction, <xref ref-type="sec" rid="s3">Section 3</xref> details the materials and methods including dataset description, SSO algorithm formulation, and CatBoost implementation, <xref ref-type="sec" rid="s4">Section 4</xref> presents experimental results and comparative analysis, <xref ref-type="sec" rid="s5">Section 5</xref> provides discussion of findings and implications, and <xref ref-type="sec" rid="s6">Section 6</xref> concludes with key contributions and future research directions.</p>
</sec>
<sec id="s2">
<label>2</label>
<title>Literature Review</title>
<p>The ensemble learning techniques have also demonstrated specific potential in challenging the complexity of prediction of concrete strength. A recent study by way of illustration of this trend [<xref ref-type="bibr" rid="ref-17">17</xref>] where pure physical testing is replaced by the ISSA-BPNN-AdaBoost model, which has shown itself to be superior to the physical testing model when compared to other traditional physical testing models. The technique does not suffer time consumption and cost constraints that characterizes conventional strength testing yet high degree of accuracy is achieved. The effectiveness of optimizing the base learners in ensemble structures through excellent results is further proved by the superiority between ensemble learners in different datasets.</p>
<p>There has been a growing role of model interpretability because machine learning models are now being incorporated into significant engineering choices. Especially, one of the recent studies established a framework of interpretable machine learning applied to high-performance concrete (HPC) compressive strength prediction [<xref ref-type="bibr" rid="ref-18">18</xref>] indicates the use of random forest, AdaBoost, XGBoost, and LightGBM models, hyperparameter optimization (by cross-validation methods). SHAP analysis indicated that age, water-to-cement ratio, slag and content of water, compose crucial elements that affect forecasted compressive strength and superplasticizers ratios had lower impacts. This decision supportable method improves precision and visibility of prediction such that engineers gather the associations between mix elements and connections of strength properties.</p>
<p>Extensive dataset generation has brought many achievements in the field and researchers identify the significance of an effective data base to develop and validate the model. As an example, ConcreteXAI dataset [<xref ref-type="bibr" rid="ref-19">19</xref>] is an important contribution as it is a ten year lab study consisting of 18,480 measurements each of 12 different concrete formulations. This is holistic data consisting of both mechanical and non-destructive tests, which makes a proper development of a predictive model quite feasible and decision-making processes to stay transparent.</p>
<p>The use of concrete has been studied beyond the regular concrete to consider more specific formulations and uses according to the various needs of the construction projects judged by the modern needs. As an illustration, one study [<xref ref-type="bibr" rid="ref-20">20</xref>] examining preplaced aggregate concrete (PAC) discovered that the complexity of loading cementitious mixtures into existing form of pre-placed coarse aggregates led to complicated methods of prediction because of production peculiarities and the material properties the final products developed. Likewise, one study [<xref ref-type="bibr" rid="ref-21">21</xref>] also examined hybrid-fiber-reinforced recycle aggregate concrete (HFRRAC) where eleven machine learning models were used to overcome non-linearity between constituents. The results revealed that light gradient boosting machine algorithms were the most precise with regards to this complicated material system and they obtained high R2 values and low error measures.</p>
<p>In modern engineering practices, high-performance concrete has been given special focus because of enhanced mechanical characteristics and multi-faceted engineering practise in most modern construction works. According to a recent study [<xref ref-type="bibr" rid="ref-22">22</xref>], the compressive strengths that can be reached using HPC are higher than 200 MPa, the average strengths of 100&#x2013;120 Mpa can be used in 28 days in a variety of engineering projects. The problems related to predictability of such high-strength material assume use of advanced modeling methods, which are capable of modeling complex correlations between advanced admixtures and final properties.</p>
<p>The combination of optimization algorithms with machine learning has also become a potent method of predicting concrete strength that combines the pattern matching abilities of machine learning with the methodical search capacity of optimization algorithms. Indeed an example of this was noted by research study carried out by [<xref ref-type="bibr" rid="ref-23">23</xref>] using a hybrid combination of a random forest with a particle swarm optimization (PSO) to predict concrete strength using blast furnace slag and fly ash. The model had a correlation coefficient of 0.954 on the testing set, which proved the great generalization capacity to sustainable forms of concrete.</p>
<p>Progress in hybrid methods are occurring, and scientists are creating more powers of prediction of concrete strength through computer models. Among others, more recent research has proposed an advanced hybrid algorithm incorporating deep learning with reinforcement learning that utilizes Convolutional Neural Network&#x2013;Long Short-Term Memory (CNN&#x2013;LSTM) based feature extraction and a Dueling Double Deep Q-Network agent that optimises mix ratios by repeatedly repeating the optimization of features and mix ratios [<xref ref-type="bibr" rid="ref-24">24</xref>]. The methods are a future of computational intelligence in a real world design.</p>
<p>Concrete strength prediction research has come to include an increased focus on environmental sustainability (sustainable construction practices) as the prevailing attitude of the construction industry. One case in point consists of the recent introduction of the SmartMix Web3 model, which is an overview of ensemble machine learning integrated with blockchain technology to lend more strength to low-carbon concrete design [<xref ref-type="bibr" rid="ref-25">25</xref>]. This is a novel way of approaching things to show that, even to the technical challenges that are on the table, the environment is being challenged by modern computational methods.</p>
<p>It may also be an important line of research to resolve the data limitations because not all types of concrete and all its applications are covered with extensive datasets. Here, the framework offered by recent research [<xref ref-type="bibr" rid="ref-26">26</xref>], combining Model-Agnostic Meta-Learning (MAML) and Shapley Additive Explanations (SHAP) aids the engineering of solutions to get a better predictive accuracy, as well as to improve an interpretation in data-limited scenarios, where both methodologies are essential ways to achieve higher predictive performance using fewer explanations grounded on prediction equations.</p>
<p><xref ref-type="table" rid="table-1">Table 1</xref> provides a systematic comparison of the various machine learning approaches discussed, highlighting the diversity of methodologies and their specific applications in concrete strength prediction.</p>
<table-wrap id="table-1">
<label>Table 1</label>
<caption>
<title>Condensed comparative summary of ML-based concrete strength prediction studies</title>
</caption>
<table>
<colgroup>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th>No.</th>
<th>Main focus</th>
<th>Methodology</th>
<th>Key findings</th>
<th>Research gaps &#x0026; limitations</th>
</tr>
</thead>
<tbody>
<tr>
<td>[<xref ref-type="bibr" rid="ref-17">17</xref>]</td>
<td>Ensemble prediction enhancement</td>
<td>ISSA-BPNN-AdaBoost</td>
<td>Higher accuracy than conventional testing; optimized base-learner integration</td>
<td>Limited ensemble comparison; interpretability unexplored</td>
</tr>
<tr>
<td>[<xref ref-type="bibr" rid="ref-18">18</xref>]</td>
<td>Interpretable HPC prediction</td>
<td>RF, AdaBoost, XGBoost, LightGBM &#x002B; SHAP</td>
<td>Key influence of curing age and W/C ratio; improved visibility</td>
<td>No metaheuristic tuning; efficiency not assessed</td>
</tr>
<tr>
<td>[<xref ref-type="bibr" rid="ref-19">19</xref>]</td>
<td>Extensive concrete dataset creation</td>
<td>ConcreteXAI (18,480 samples)</td>
<td>Broad mechanical &#x002B; NDT data for reliable model training</td>
<td>Generalization untested; no ML benchmarking</td>
</tr>
<tr>
<td>[<xref ref-type="bibr" rid="ref-20">20</xref>]</td>
<td>PAC strength estimation</td>
<td>ML-based predictive modeling</td>
<td>Addresses PAC material complexity</td>
<td>Narrow material focus; no hybrid optimization</td>
</tr>
<tr>
<td>[<xref ref-type="bibr" rid="ref-21">21</xref>]</td>
<td>HFRRAC prediction</td>
<td>11 ML models</td>
<td>LightGBM yielded best accuracy</td>
<td>CatBoost omitted; limited hyperparameter study</td>
</tr>
<tr>
<td>[<xref ref-type="bibr" rid="ref-22">22</xref>]</td>
<td>HPC compressive strength analysis</td>
<td>Advanced modeling strategies</td>
<td>Strength <inline-formula id="ieqn-1"><mml:math id="mml-ieqn-1"><mml:mo>&#x2265;</mml:mo></mml:math></inline-formula> 200 MPa possible; complex mix interactions</td>
<td>No metaheuristic integration; dataset limited</td>
</tr>
<tr>
<td>[<xref ref-type="bibr" rid="ref-23">23</xref>]</td>
<td>Sustainable concrete optimization</td>
<td>RF&#x2013;PSO hybrid</td>
<td>High generalization<break/>(R &#x003D; 0.954)</td>
<td>No comparison against advanced ensembles</td>
</tr>
<tr>
<td>[<xref ref-type="bibr" rid="ref-24">24</xref>]</td>
<td>DL &#x002B; RL hybrid optimization</td>
<td>CNN&#x2013;LSTM &#x002B; Dueling DDQN</td>
<td>Improved mix optimization and feature learning</td>
<td>High complexity; limited comparative evaluation</td>
</tr>
<tr>
<td>[<xref ref-type="bibr" rid="ref-25">25</xref>]</td>
<td>Low-carbon concrete modeling</td>
<td>SmartMix Web3 (ensemble &#x002B; blockchain)</td>
<td>Integrates ML with sustainability tracking</td>
<td>Blockchain-focused; insufficient performance metrics</td>
</tr>
<tr>
<td>[<xref ref-type="bibr" rid="ref-26">26</xref>]</td>
<td>Data-constrained prediction</td>
<td>MAML &#x002B; SHAP</td>
<td>Better accuracy with limited samples</td>
<td>Meta-learning centric; no optimizer integration</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The comprehensive analysis presented in <xref ref-type="table" rid="table-1">Table 1</xref> reveals several critical research gaps that motivate the present study. First, despite the proven effectiveness of gradient boosting algorithms, CatBoost remains underexplored in concrete strength prediction compared to XGBoost and LightGBM, despite offering superior handling of categorical features, built-in regularization, and resistance to overfitting. Second, systematic comparison of CatBoost with diverse bio-inspired metaheuristic optimizers is absent from the literature, with most studies limiting investigations to single optimization algorithms or relying on computationally expensive grid search methods. Third, recent bio-inspired algorithms such as the Somersaulting Spider Optimizer (SSO), which demonstrates promising exploration-exploitation balance through adaptive energy management, have not been applied to concrete strength prediction. These gaps collectively justify the need for a comprehensive framework combining CatBoost&#x2019;s gradient boosting capabilities with SSO and other metaheuristic algorithms to achieve enhanced predictive accuracy, computational efficiency, and robust generalization across diverse concrete formulations.</p>

<p>The Somersaulting Spider Optimizer (SSO) represents a recent advancement in bio-inspired metaheuristics, employing dual locomotion modes&#x2014;high-energy somersaulting for global exploration and low-energy rolling for local exploitation&#x2014;regulated by adaptive energy management that balances search diversification and intensification. SSO has demonstrated competitive performance across benchmark optimization problems and has proven effective in ensemble learning applications, particularly for earthquake prediction where it achieved superior predictive accuracy and generalization compared to conventional metaheuristics [<xref ref-type="bibr" rid="ref-27">27</xref>]. Given SSO&#x2019;s proven capability in handling nonconvex, multimodal optimization landscapes and its successful application to machine learning model optimization, it presents a promising candidate for CatBoost hyperparameter tuning in concrete strength prediction, addressing the identified research gap regarding the integration of advanced bio-inspired algorithms with gradient boosting frameworks for enhanced predictive performance.</p>
</sec>
<sec id="s3">
<label>3</label>
<title>Materials and Methods</title>
<p>This study proposes an elaborate machine learning model to forecast concrete compressive strength that is based on the advanced gradient boosting model and bio-inspired optimization tool. The three primary components of the methodology are the data preparation and analysis done with the help of exploratory statistics, a Somersaulting Spider Optimizer (SSO) algorithm to find hyperparameters, and the use of CatBoost as the predictive model. Experimental framework addresses the system of non-linear relationships among parameters of concrete mix engineering and the compressive power of resultant products through optimization of machine learning model hyperparameters. The suggested plan applies the bio-inspired exploration-exploitation principles of the SSO algorithm to identify the most appropriate CatBoost parameters and optimize the predictive performance basing on different concrete formulas. The integrated method provides a potent foundation on realistic estimation of concrete strength, to support the decision making which is based on data in optimal mixes designs and quality control uses.</p>
<p>The proposed methodology follows a systematic three-phase workflow, as illustrated in <xref ref-type="fig" rid="fig-1">Fig. 1</xref>. The data preparation phase begins with the concrete dataset containing eight input features, followed by preprocessing and exploratory data analysis to identify patterns and correlations. In the model development phase, the dataset is partitioned into training (80%) and testing (20%) sets, and the CatBoost regression model is optimized using the Somersaulting Spider Optimizer (SSO) for hyperparameter tuning. The evaluation phase assesses predictive performance through multiple metrics including MSE, <inline-formula id="ieqn-2"><mml:math id="mml-ieqn-2"><mml:msup><mml:mi>R</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>, and MAE, validates statistical significance using ANOVA and Wilcoxon tests, and performs comparative analysis against eight alternative optimization algorithms to demonstrate the superiority of the SSO-based approach.</p>
<fig id="fig-1">
<label>Figure 1</label>
<caption>
<title>Workflow of the proposed SSO-CatBoost framework for concrete strength prediction</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMES_73555-fig-1.tif"/>
</fig>
<sec id="s3_1">
<label>3.1</label>
<title>Dataset</title>
<p>The dataset of concrete compressive strength that was used in this paper provides a holistic basis with respect to evaluating the machine learning algorithms in predicting concrete strength using mix design parameters. The variety of concrete formulations represented in the dataset that can be found would be suitable when comparing predictive models of considerable diversity of mix formulations and curing conditions. <xref ref-type="table" rid="table-2">Table 2</xref> describes the dataset features in detail with their units and their common ranges.</p>
<table-wrap id="table-2">
<label>Table 2</label>
<caption>
<title>Dataset feature description and statistical summary</title>
</caption>
<table>
<colgroup>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th>Feature</th>
<th>Description</th>
<th>Unit</th>
<th>Range</th>
</tr>
</thead>
<tbody>
<tr>
<td>Cement</td>
<td>Ordinary portland cement content</td>
<td><inline-formula id="ieqn-7"><mml:math id="mml-ieqn-7"><mml:msup><mml:mi>kg/m</mml:mi><mml:mrow><mml:mn>3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula></td>
<td>102.0&#x2013;540.0</td>
</tr>
<tr>
<td>Blast furnace slag</td>
<td>Ground granulated blast furnace slag</td>
<td><inline-formula id="ieqn-8"><mml:math id="mml-ieqn-8"><mml:msup><mml:mi>kg/m</mml:mi><mml:mrow><mml:mn>3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula></td>
<td>0.0&#x2013;359.4</td>
</tr>
<tr>
<td>Fly ash</td>
<td>Class F fly ash content</td>
<td><inline-formula id="ieqn-9"><mml:math id="mml-ieqn-9"><mml:msup><mml:mi>kg/m</mml:mi><mml:mrow><mml:mn>3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula></td>
<td>0.0&#x2013;200.1</td>
</tr>
<tr>
<td>Water</td>
<td>Total water content</td>
<td><inline-formula id="ieqn-10"><mml:math id="mml-ieqn-10"><mml:msup><mml:mi>kg/m</mml:mi><mml:mrow><mml:mn>3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula></td>
<td>121.8&#x2013;247.0</td>
</tr>
<tr>
<td>Super-plasticizer</td>
<td>High-range water reducer</td>
<td><inline-formula id="ieqn-11"><mml:math id="mml-ieqn-11"><mml:msup><mml:mi>kg/m</mml:mi><mml:mrow><mml:mn>3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula></td>
<td>0.0&#x2013;32.2</td>
</tr>
<tr>
<td>Coarse aggregate</td>
<td>Crushed stone aggregate</td>
<td><inline-formula id="ieqn-12"><mml:math id="mml-ieqn-12"><mml:msup><mml:mi>kg/m</mml:mi><mml:mrow><mml:mn>3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula></td>
<td>801.0&#x2013;1145.0</td>
</tr>
<tr>
<td>Fine aggregate</td>
<td>Natural sand aggregate</td>
<td><inline-formula id="ieqn-13"><mml:math id="mml-ieqn-13"><mml:msup><mml:mi>kg/m</mml:mi><mml:mrow><mml:mn>3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula></td>
<td>594.0&#x2013;992.6</td>
</tr>
<tr>
<td>Age</td>
<td>Concrete age at testing</td>
<td>days</td>
<td>1&#x2013;365</td>
</tr>
<tr>
<td align="center" colspan="4"><bold>Target variable</bold></td>
</tr>
<tr>
<td>Compressive strength</td>
<td>28-day equivalent strength</td>
<td>MPa</td>
<td>2.33&#x2013;82.60</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The concrete compressive strength dataset is publicly available on Kaggle (<ext-link ext-link-type="uri" xlink:href="https://www.kaggle.com/datasets/prathamtripathi/regression-with-neural-networking">https://www.kaggle.com/datasets/prathamtripathi/regression-with-neural-networking</ext-link>). The dataset consists of 1030 samples with no missing values across all features, which include Cement, Blast Furnace Slag, Fly Ash, Water, Superplasticizer, Coarse Aggregate, Fine Aggregate, Age, and Strength. After removing 25 duplicate entries, 1005 unique records remained for analysis. Summary statistics indicate that cement content ranges from 102 to 540 <inline-formula id="ieqn-3"><mml:math id="mml-ieqn-3"><mml:msup><mml:mi>kg/m</mml:mi><mml:mrow><mml:mn>3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula> with a mean of 281.17 <inline-formula id="ieqn-4"><mml:math id="mml-ieqn-4"><mml:msup><mml:mi>kg/m</mml:mi><mml:mrow><mml:mn>3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>; water content varies between 121.8 and 247 <inline-formula id="ieqn-5"><mml:math id="mml-ieqn-5"><mml:msup><mml:mi>kg/m</mml:mi><mml:mrow><mml:mn>3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula> with an average of 181.57 <inline-formula id="ieqn-6"><mml:math id="mml-ieqn-6"><mml:msup><mml:mi>kg/m</mml:mi><mml:mrow><mml:mn>3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>; and compressive strength spans from 2.33 to 82.6 MPa, averaging 35.82 MPa. These comprehensive distributions ensure a robust foundation for accurate strength prediction across diverse concrete mix designs.</p>
<p>The feature set encompasses both cementitious materials (cement, blast furnace slag, fly ash) and non-cementitious components (aggregates, water, chemical admixtures), providing comprehensive representation of modern concrete mix designs. The inclusion of supplementary cementitious materials (blast furnace slag and fly ash) reflects contemporary sustainable concrete practices, while the super-plasticizer feature represents modern chemical admixture technology for workability enhancement.</p>
<p><xref ref-type="fig" rid="fig-2">Fig. 2</xref> presents a correlation heatmap revealing relationships between concrete ingredients and its strength. The heatmap visually displays the correlation strengths using varying color intensities. Deeper colors signify stronger correlations, while lighter colors denote weaker associations. Cement demonstrates a positive correlation with concrete strength. Each ingredient perfectly correlates with itself, indicated by a perfect correlation along the diagonal. Overall, the ingredient correlation with strength range from. 08 to 0.85. The heatmap is symmetric around the diagonal. Each ingredient and strength is labeled horizontally and vertically.</p>
<fig id="fig-2">
<label>Figure 2</label>
<caption>
<title>Correlation heatmap of concrete ingredient relationships</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMES_73555-fig-2.tif"/>
</fig>
<p><xref ref-type="fig" rid="fig-3">Fig. 3</xref> shows the distribution of concrete strength, exhibiting a unimodal shape with the highest frequencies concentrated around the central region. The curve&#x2019;s peak reveals that the best model has the highest frequency at approximately 35 MPa, indicating a concentration of concrete strength values around this point. Frequencies gradually decrease as the strength values deviate further from 35.</p>
<fig id="fig-3">
<label>Figure 3</label>
<caption>
<title>Distribution of concrete strength</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMES_73555-fig-3.tif"/>
</fig>
<p><xref ref-type="fig" rid="fig-4">Fig. 4</xref> illustrates the distribution of concrete ingredients, revealing key characteristics of the dataset. The distributions showcase varying patterns across different components. Notably, the age distribution displays a heavy skew, indicating a concentration of samples at younger ages. The distribution of blast furnace slag exhibits a similar skewed pattern. Fine aggregate distribution has a peak at the center of the range.</p>
<fig id="fig-4">
<label>Figure 4</label>
<caption>
<title>Concrete ingredient distribution analysis</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMES_73555-fig-4.tif"/>
</fig>
<p><xref ref-type="fig" rid="fig-5">Fig. 5</xref> illustrates the inverse relationship between water-to-cement ratio and concrete strength. The water-to-cement ratio was calculated as <inline-formula id="ieqn-14"><mml:math id="mml-ieqn-14"><mml:mtext>Water-to-Cement-Ratio</mml:mtext><mml:mo>=</mml:mo><mml:mfrac><mml:mtext>Water</mml:mtext><mml:mtext>Cement</mml:mtext></mml:mfrac></mml:math></inline-formula>, where Water and Cement represent the respective content by weight (<inline-formula id="ieqn-15"><mml:math id="mml-ieqn-15"><mml:msup><mml:mi>kg/m</mml:mi><mml:mrow><mml:mn>3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>) in the concrete mix. The graph depicts concrete strength, quantified in MPa, as it varies with this ratio. The displayed trend suggests a decline in strength as the ratio increases. A linear model, represented by the red line, fits the data, emphasizing this downward trend.</p>
<fig id="fig-5">
<label>Figure 5</label>
<caption>
<title>Effect of water-cement ratio on concrete compressive strength</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMES_73555-fig-5.tif"/>
</fig>
<p>The varying shades of green represent differences in point density: darker areas indicate regions where observations overlap more heavily, while lighter areas correspond to more sparsely distributed points.</p>
<p><xref ref-type="fig" rid="fig-6">Fig. 6</xref> illustrates the connection between cement content and concrete strength. The best model indicates a positive correlation, suggesting that as cement content increases, so does concrete strength. The best model line demonstrates an increase in strength with a rise in cement. The fitted line has a dashed style and positive association.</p>
<fig id="fig-6">
<label>Figure 6</label>
<caption>
<title>Effect of cement content on concrete compressive strength</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMES_73555-fig-6.tif"/>
</fig>
<p>The importance of features when predicting the concrete strength with a Random Forest model is presented in <xref ref-type="fig" rid="fig-7">Fig. 7</xref>. Age was identified as the most prominent with an importance value of 0.336, hence demonstrating that it is predominant in the model predictions. Second was Cement with a significant impact of 0.331, which emphasized the fact that cement is an important element in the generation of strength. An important role was also played by Water with a value of 0.107, and other variables such as Blast Furnace Slag (0.079) and Superplasticizer (0.065) played a less substantial role. On the other hand, Fine Aggregate (0.038), Coarse Aggregate (0.028), and Fly Ash (0.016) attributes did not significantly affect the model, and this explains the critical role played by Age and Cement in the determination of concrete strength in this model.</p>
<fig id="fig-7">
<label>Figure 7</label>
<caption>
<title>Random forest feature importance for concrete strength prediction</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMES_73555-fig-7.tif"/>
</fig>
<p>The value of feature importance given by the Random Forest model is a numerical value that is utilized to establish the relevance of each variable on the overall predictive success. In this instance, the scores of importance indicate the effects of each feature on concrete strength prediction as compared to the other features. These scores help in identifying the most impactful parameters, which enhances understanding and, perhaps due to that, helps develop more effective mix design strategies. The bar graph provided by the figure, in a visual form, offers a clear idea of the dominating influence of both Age and Cement in predetermining the concrete strength in this study, which is an important revelation.</p>
<p>A key limitation is that standard feature-importance scores reflect only the individual input variables and do not automatically capture derived ratios (e.g., W/C ratio influence) or interactions; therefore, combined effects may be learned implicitly but not appear as a ratio.</p>
</sec>
<sec id="s3_2">
<label>3.2</label>
<title>Somersaulting Spider Optimizer (SSO) Algorithm</title>
<p>The Somersaulting Spider Optimizer (SSO) is a bio-inspired metaheuristic algorithm based on the extraordinary locomotion behavior of <italic>Cebrennus rechenbergi</italic>, commonly known as the somersaulting spider or flic-flac spider. This desert-dwelling arachnid exhibits two distinct movement patterns: high-energy somersaulting for rapid escape at velocities up to 2 m per second, and low-energy rolling for local foraging and territory exploration [<xref ref-type="bibr" rid="ref-28">28</xref>].</p>
<p>The SSO algorithm models spider behavior through a population of <italic>N</italic> spider agents, each representing a candidate solution <inline-formula id="ieqn-16"><mml:math id="mml-ieqn-16"><mml:msub><mml:mrow><mml:mtext mathvariant="bold">x</mml:mtext></mml:mrow><mml:mi>i</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mo stretchy="false">[</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mo>&#x2026;</mml:mo><mml:mo>,</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>D</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">]</mml:mo></mml:math></inline-formula> in a <italic>D</italic>-dimensional search space. Each spider maintains an energy level <inline-formula id="ieqn-17"><mml:math id="mml-ieqn-17"><mml:msubsup><mml:mi>E</mml:mi><mml:mi>i</mml:mi><mml:mi>t</mml:mi></mml:msubsup><mml:mo>&#x2208;</mml:mo><mml:mo stretchy="false">[</mml:mo><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mn>1</mml:mn><mml:mo stretchy="false">]</mml:mo></mml:math></inline-formula> and a stagnation counter <inline-formula id="ieqn-18"><mml:math id="mml-ieqn-18"><mml:msubsup><mml:mi>S</mml:mi><mml:mi>i</mml:mi><mml:mi>t</mml:mi></mml:msubsup></mml:math></inline-formula> that influences behavioral decisions.</p>
<p>The balance between exploration and exploitation phases is controlled by time-dependent adaptive factors:
<disp-formula id="eqn-1"><label>(1)</label><mml:math id="mml-eqn-1" display="block"><mml:msub><mml:mi>&#x03B1;</mml:mi><mml:mrow><mml:mrow><mml:mtext>exp</mml:mtext></mml:mrow></mml:mrow></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mi>t</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mo>&#x2212;</mml:mo><mml:mfrac><mml:mi>t</mml:mi><mml:msub><mml:mi>T</mml:mi><mml:mrow><mml:mo movablelimits="true" form="prefix">max</mml:mo></mml:mrow></mml:msub></mml:mfrac><mml:mo>,</mml:mo><mml:mspace width="1em" /><mml:msub><mml:mi>&#x03B1;</mml:mi><mml:mrow><mml:mrow><mml:mtext>expl</mml:mtext></mml:mrow></mml:mrow></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mi>t</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mfrac><mml:mi>t</mml:mi><mml:msub><mml:mi>T</mml:mi><mml:mrow><mml:mo movablelimits="true" form="prefix">max</mml:mo></mml:mrow></mml:msub></mml:mfrac></mml:math></disp-formula>where <inline-formula id="ieqn-19"><mml:math id="mml-ieqn-19"><mml:mi>t</mml:mi></mml:math></inline-formula> represents the current iteration and <inline-formula id="ieqn-20"><mml:math id="mml-ieqn-20"><mml:msub><mml:mi>T</mml:mi><mml:mrow><mml:mo movablelimits="true" form="prefix">max</mml:mo></mml:mrow></mml:msub></mml:math></inline-formula> denotes the maximum iterations. This complementary relationship ensures seamless transition from exploration-dominated to exploitation-dominated search phases.</p>
<p>Each spider agent maintains a dynamic energy level that reflects search performance:
<disp-formula id="eqn-2"><label>(2)</label><mml:math id="mml-eqn-2" display="block"><mml:msubsup><mml:mi>E</mml:mi><mml:mi>i</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mo>+</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mtable columnalign="left left" rowspacing=".2em" columnspacing="1em" displaystyle="false"><mml:mtr><mml:mtd><mml:mo movablelimits="true" form="prefix">min</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:mn>1.0</mml:mn><mml:mo>,</mml:mo><mml:msubsup><mml:mi>E</mml:mi><mml:mi>i</mml:mi><mml:mi>t</mml:mi></mml:msubsup><mml:mo>+</mml:mo><mml:mn>0.1</mml:mn><mml:mo stretchy="false">)</mml:mo></mml:mtd><mml:mtd><mml:mrow><mml:mtext>if&#xA0;</mml:mtext></mml:mrow><mml:mi>f</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:msubsup><mml:mrow><mml:mtext mathvariant="bold">x</mml:mtext></mml:mrow><mml:mi>i</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mo>+</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msubsup><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x003C;</mml:mo><mml:mi>f</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:msubsup><mml:mrow><mml:mtext mathvariant="bold">x</mml:mtext></mml:mrow><mml:mi>i</mml:mi><mml:mi>t</mml:mi></mml:msubsup><mml:mo stretchy="false">)</mml:mo></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mn>0.95</mml:mn><mml:mo>&#x00D7;</mml:mo><mml:msubsup><mml:mi>E</mml:mi><mml:mi>i</mml:mi><mml:mi>t</mml:mi></mml:msubsup></mml:mtd><mml:mtd><mml:mrow><mml:mtext>otherwise</mml:mtext></mml:mrow></mml:mtd></mml:mtr></mml:mtable><mml:mo fence="true" stretchy="true" symmetric="true"></mml:mo></mml:mrow></mml:math></disp-formula></p>
<p>The behavioral threshold determining movement type is calculated as:
<disp-formula id="eqn-3"><label>(3)</label><mml:math id="mml-eqn-3" display="block"><mml:msubsup><mml:mi>&#x03B8;</mml:mi><mml:mrow><mml:mrow><mml:mtext>behav</mml:mtext></mml:mrow></mml:mrow><mml:mi>i</mml:mi></mml:msubsup><mml:mo stretchy="false">(</mml:mo><mml:mi>t</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:msub><mml:mi>&#x03B1;</mml:mi><mml:mrow><mml:mrow><mml:mtext>exp</mml:mtext></mml:mrow></mml:mrow></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mi>t</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x00D7;</mml:mo><mml:msubsup><mml:mi>E</mml:mi><mml:mi>i</mml:mi><mml:mi>t</mml:mi></mml:msubsup></mml:math></disp-formula></p>
<p>When <inline-formula id="ieqn-21"><mml:math id="mml-ieqn-21"><mml:mtext>rand</mml:mtext><mml:mo stretchy="false">(</mml:mo><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x003C;</mml:mo><mml:msubsup><mml:mi>&#x03B8;</mml:mi><mml:mrow><mml:mtext>behav</mml:mtext></mml:mrow><mml:mi>i</mml:mi></mml:msubsup><mml:mo stretchy="false">(</mml:mo><mml:mi>t</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula>, the spider performs somersaulting movement; otherwise, rolling movement is applied.</p>
<p>The exploration phase employs somersaulting movement combining rotational and translational components. The target selection adapts based on stagnation status, with stagnant spiders (<inline-formula id="ieqn-22"><mml:math id="mml-ieqn-22"><mml:msubsup><mml:mi>S</mml:mi><mml:mi>i</mml:mi><mml:mi>t</mml:mi></mml:msubsup><mml:mo>&#x003E;</mml:mo><mml:mn>5</mml:mn></mml:math></inline-formula>) targeting random positions and active spiders targeting the global best solution. The somersaulting movement is formulated as:
<disp-formula id="eqn-4"><label>(4)</label><mml:math id="mml-eqn-4" display="block"><mml:msubsup><mml:mi>x</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mtext>new</mml:mtext></mml:mrow></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:msubsup><mml:mi>x</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi></mml:mrow><mml:mi>t</mml:mi></mml:msubsup><mml:mo>+</mml:mo><mml:msub><mml:mi>&#x03D5;</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:mo>&#x00D7;</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:mi>u</mml:mi><mml:msub><mml:mi>b</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:mo>&#x2212;</mml:mo><mml:mi>l</mml:mi><mml:msub><mml:mi>b</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x00D7;</mml:mo><mml:mn>0.1</mml:mn><mml:mo>+</mml:mo><mml:msub><mml:mi>&#x03C4;</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:mo>&#x00D7;</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mrow><mml:mtext>target</mml:mtext></mml:mrow><mml:mo>,</mml:mo><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msubsup><mml:mi>x</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi></mml:mrow><mml:mi>t</mml:mi></mml:msubsup><mml:mo stretchy="false">)</mml:mo></mml:math></disp-formula>where the rotational and translational components are:
<disp-formula id="eqn-5"><label>(5)</label><mml:math id="mml-eqn-5" display="block"><mml:msub><mml:mi>&#x03D5;</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>I</mml:mi><mml:mi>s</mml:mi></mml:msub><mml:mo>&#x00D7;</mml:mo><mml:mi>sin</mml:mi><mml:mo>&#x2061;</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:mn>2</mml:mn><mml:mi>&#x03C0;</mml:mi><mml:mo>&#x00D7;</mml:mo><mml:msub><mml:mi>r</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo>,</mml:mo><mml:mspace width="1em" /><mml:msub><mml:mi>&#x03C4;</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mn>0.5</mml:mn><mml:mo>+</mml:mo><mml:mn>0.5</mml:mn><mml:mo>&#x00D7;</mml:mo><mml:msub><mml:mi>r</mml:mi><mml:mn>2</mml:mn></mml:msub></mml:math></disp-formula></p>
<p>The intensity factor <inline-formula id="ieqn-23"><mml:math id="mml-ieqn-23"><mml:msub><mml:mi>I</mml:mi><mml:mi>s</mml:mi></mml:msub></mml:math></inline-formula> adjusts based on stagnation: <inline-formula id="ieqn-24"><mml:math id="mml-ieqn-24"><mml:msub><mml:mi>I</mml:mi><mml:mi>s</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mn>0.8</mml:mn><mml:mo>&#x00D7;</mml:mo><mml:msub><mml:mi>&#x03B1;</mml:mi><mml:mrow><mml:mtext>exp</mml:mtext></mml:mrow></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mi>t</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> for stagnant spiders, <inline-formula id="ieqn-25"><mml:math id="mml-ieqn-25"><mml:msub><mml:mi>I</mml:mi><mml:mi>s</mml:mi></mml:msub><mml:mo>=</mml:mo></mml:math></inline-formula><inline-formula id="ieqn-26"><mml:math id="mml-ieqn-26"><mml:mn>0.6</mml:mn><mml:mo>&#x00D7;</mml:mo><mml:msub><mml:mi>&#x03B1;</mml:mi><mml:mrow><mml:mtext>exp</mml:mtext></mml:mrow></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mi>t</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> otherwise.</p>
<p>The exploitation phase uses rolling movement for local refinement:
<disp-formula id="eqn-6"><label>(6)</label><mml:math id="mml-eqn-6" display="block"><mml:msubsup><mml:mi>x</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mtext>new</mml:mtext></mml:mrow></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:msubsup><mml:mi>x</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi></mml:mrow><mml:mi>t</mml:mi></mml:msubsup><mml:mo>+</mml:mo><mml:msub><mml:mi>R</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:mo>&#x00D7;</mml:mo><mml:mi>cos</mml:mi><mml:mo>&#x2061;</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:mn>2</mml:mn><mml:mi>&#x03C0;</mml:mi><mml:mo>&#x00D7;</mml:mo><mml:msub><mml:mi>r</mml:mi><mml:mn>3</mml:mn></mml:msub><mml:mo stretchy="false">)</mml:mo></mml:math></disp-formula>where the rolling radius decreases with iteration progress:
<disp-formula id="eqn-7"><label>(7)</label><mml:math id="mml-eqn-7" display="block"><mml:msub><mml:mi>R</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>&#x03B1;</mml:mi><mml:mrow><mml:mrow><mml:mtext>expl</mml:mtext></mml:mrow></mml:mrow></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mi>t</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x00D7;</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:mi>u</mml:mi><mml:msub><mml:mi>b</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:mo>&#x2212;</mml:mo><mml:mi>l</mml:mi><mml:msub><mml:mi>b</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x00D7;</mml:mo><mml:mn>0.05</mml:mn></mml:math></disp-formula></p>
<p>When spider energy falls below 0.2, conservative adjustment is applied:
<disp-formula id="eqn-8"><label>(8)</label><mml:math id="mml-eqn-8" display="block"><mml:msubsup><mml:mi>x</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mtext>adjusted</mml:mtext></mml:mrow></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:mn>0.9</mml:mn><mml:mo>&#x00D7;</mml:mo><mml:msubsup><mml:mi>x</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi></mml:mrow><mml:mi>t</mml:mi></mml:msubsup><mml:mo>+</mml:mo><mml:mn>0.1</mml:mn><mml:mo>&#x00D7;</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mrow><mml:mtext>best</mml:mtext></mml:mrow><mml:mo>,</mml:mo><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:math></disp-formula></p>
<p>The complete Somersaulting Spider Optimizer algorithm integrates both exploration and exploitation mechanisms through an adaptive energy management system. Algorithm 1 presents the main framework that coordinates the somersaulting and rolling behaviors while maintaining population diversity and convergence toward optimal solutions.</p>
<fig id="fig-22">
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMES_73555-fig-22.tif"/>
</fig>
<p>The SSO algorithm&#x2019;s adaptive energy management system and bio-inspired movement mechanisms provide effective balance between exploration and exploitation, making it particularly suitable for hyperparameter optimization in machine learning models such as CatBoost for concrete strength prediction applications.</p>
<p>The complete SSO implementation is publicly available on GitHub (<ext-link ext-link-type="uri" xlink:href="https://github.com/SayedKenawy/SomersaultingSpiderOptimizer">https://github.com/SayedKenawy/SomersaultingSpiderOptimizer</ext-link>) to ensure reproducibility and facilitate further research.</p>
</sec>
<sec id="s3_3">
<label>3.3</label>
<title>CatBoost</title>
<p>CatBoost (Categorical Boosting) is a state-of-the-art gradient boosting algorithm that addresses critical limitations in traditional boosting implementations through two fundamental innovations: ordered boosting and advanced categorical feature processing. The algorithm effectively mitigates prediction shift caused by target leakage, making it particularly suitable for concrete strength prediction tasks involving the eight-dimensional feature space of concrete mix design parameters [<xref ref-type="bibr" rid="ref-6">6</xref>].</p>
<sec id="s3_3_1">
<label>3.3.1</label>
<title>Mathematical Framework and Dataset Integration</title>
<p>CatBoost constructs an ensemble predictor by iteratively combining weak learners following the standard gradient boosting framework [<xref ref-type="bibr" rid="ref-29">29</xref>]. Given the concrete dataset <inline-formula id="ieqn-44"><mml:math id="mml-ieqn-44"><mml:mi>D</mml:mi><mml:mo>=</mml:mo><mml:mo fence="false" stretchy="false">{</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>y</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:msubsup><mml:mo fence="false" stretchy="false">}</mml:mo><mml:mrow><mml:mi>k</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>n</mml:mi></mml:msubsup></mml:math></inline-formula>, where <inline-formula id="ieqn-45"><mml:math id="mml-ieqn-45"><mml:msub><mml:mi>x</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mo stretchy="false">[</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mn>2</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:mo>&#x2026;</mml:mo><mml:mo>,</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mn>8</mml:mn></mml:msub><mml:mo stretchy="false">]</mml:mo></mml:math></inline-formula> represents the eight-dimensional feature vector containing cement content, blast furnace slag, fly ash, water content, super-plasticizer, coarse aggregate, fine aggregate, and age (days), and <inline-formula id="ieqn-46"><mml:math id="mml-ieqn-46"><mml:msub><mml:mi>y</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:math></inline-formula> denotes the concrete compressive strength, the algorithm builds approximations <inline-formula id="ieqn-47"><mml:math id="mml-ieqn-47"><mml:msup><mml:mi>F</mml:mi><mml:mi>t</mml:mi></mml:msup><mml:mo>:</mml:mo><mml:msup><mml:mrow><mml:mi mathvariant="double-struck">R</mml:mi></mml:mrow><mml:mn>8</mml:mn></mml:msup><mml:mo stretchy="false">&#x2192;</mml:mo><mml:mrow><mml:mi mathvariant="double-struck">R</mml:mi></mml:mrow></mml:math></inline-formula> additively:
<disp-formula id="eqn-9"><label>(9)</label><mml:math id="mml-eqn-9" display="block"><mml:msup><mml:mi>F</mml:mi><mml:mi>t</mml:mi></mml:msup><mml:mo>=</mml:mo><mml:msup><mml:mi>F</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msup><mml:mo>+</mml:mo><mml:mi>&#x03B1;</mml:mi><mml:msub><mml:mi>h</mml:mi><mml:mi>t</mml:mi></mml:msub></mml:math></disp-formula>where <inline-formula id="ieqn-48"><mml:math id="mml-ieqn-48"><mml:mi>&#x03B1;</mml:mi></mml:math></inline-formula> is the learning rate and <inline-formula id="ieqn-49"><mml:math id="mml-ieqn-49"><mml:msub><mml:mi>h</mml:mi><mml:mi>t</mml:mi></mml:msub></mml:math></inline-formula> represents the base predictor selected to minimize expected loss:
<disp-formula id="eqn-10"><label>(10)</label><mml:math id="mml-eqn-10" display="block"><mml:msub><mml:mi>h</mml:mi><mml:mi>t</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mi>arg</mml:mi><mml:mo>&#x2061;</mml:mo><mml:munder><mml:mo movablelimits="true" form="prefix">min</mml:mo><mml:mrow><mml:mi>h</mml:mi><mml:mo>&#x2208;</mml:mo><mml:mrow><mml:mi>&#x0210B;</mml:mi></mml:mrow></mml:mrow></mml:munder><mml:mrow><mml:mi mathvariant="double-struck">E</mml:mi></mml:mrow><mml:mo stretchy="false">[</mml:mo><mml:mi>L</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>y</mml:mi><mml:mo>,</mml:mo><mml:msup><mml:mi>F</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msup><mml:mo stretchy="false">(</mml:mo><mml:mi>x</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>+</mml:mo><mml:mi>h</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>x</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo stretchy="false">)</mml:mo><mml:mo stretchy="false">]</mml:mo></mml:math></disp-formula></p>
<p>The algorithm employs oblivious decision trees as base predictors, utilizing identical splitting criteria across entire tree levels [<xref ref-type="bibr" rid="ref-30">30</xref>]. This symmetric structure is particularly advantageous for the concrete dataset where numerical features (cement: 102&#x2013;540 <inline-formula id="ieqn-50"><mml:math id="mml-ieqn-50"><mml:msup><mml:mi>kg/m</mml:mi><mml:mrow><mml:mn>3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>, blast furnace slag: 0&#x2013;359.4 <inline-formula id="ieqn-51"><mml:math id="mml-ieqn-51"><mml:msup><mml:mi>kg/m</mml:mi><mml:mrow><mml:mn>3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>, fly ash: 0&#x2013;200.1 <inline-formula id="ieqn-52"><mml:math id="mml-ieqn-52"><mml:msup><mml:mi>kg/m</mml:mi><mml:mrow><mml:mn>3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>, water: 121.8&#x2013;247 <inline-formula id="ieqn-53"><mml:math id="mml-ieqn-53"><mml:msup><mml:mi>kg/m</mml:mi><mml:mrow><mml:mn>3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>, super-plasticizer: 0&#x2013;32.2 <inline-formula id="ieqn-54"><mml:math id="mml-ieqn-54"><mml:msup><mml:mi>kg/m</mml:mi><mml:mrow><mml:mn>3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>, coarse aggregate: 801&#x2013;1145 <inline-formula id="ieqn-55"><mml:math id="mml-ieqn-55"><mml:msup><mml:mi>kg/m</mml:mi><mml:mrow><mml:mn>3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>, fine aggregate: 594&#x2013;992.6 <inline-formula id="ieqn-56"><mml:math id="mml-ieqn-56"><mml:msup><mml:mi>kg/m</mml:mi><mml:mrow><mml:mn>3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>, age: 1&#x2013;365 days) exhibit varying scales and distributions.</p>
<p>The ordered boosting mechanism eliminates target leakage by ensuring predictions for each training example depend solely on preceding examples in a random permutation <inline-formula id="ieqn-57"><mml:math id="mml-ieqn-57"><mml:mi>&#x03C3;</mml:mi></mml:math></inline-formula> [<xref ref-type="bibr" rid="ref-2">2</xref>]. For concrete strength prediction, this is crucial as it prevents the model from using future strength information when predicting current samples:<disp-formula id="eqn-11"><label>(11)</label><mml:math id="mml-eqn-11" display="block"><mml:msub><mml:mi>r</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>y</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>M</mml:mi><mml:mrow><mml:mi>&#x03C3;</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>i</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo stretchy="false">)</mml:mo></mml:math></disp-formula>where <inline-formula id="ieqn-58"><mml:math id="mml-ieqn-58"><mml:msub><mml:mi>r</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:math></inline-formula> represents the residual for the <inline-formula id="ieqn-59"><mml:math id="mml-ieqn-59"><mml:mi>i</mml:mi></mml:math></inline-formula>-th concrete sample, ensuring that the model <inline-formula id="ieqn-60"><mml:math id="mml-ieqn-60"><mml:msub><mml:mi>M</mml:mi><mml:mrow><mml:mi>&#x03C3;</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>i</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub></mml:math></inline-formula> uses only the first <inline-formula id="ieqn-61"><mml:math id="mml-ieqn-61"><mml:mi>&#x03C3;</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>i</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:math></inline-formula> samples for prediction, maintaining temporal and distributional consistency.</p>
<p>The algorithm&#x2019;s gradient computation considers the complex interactions between cementitious materials (cement, blast furnace slag, fly ash), water-to-binder ratios, chemical admixtures (super-plasticizer), aggregate compositions (coarse and fine aggregates), and curing time (age) that collectively determine concrete compressive strength through non-linear relationships.</p>
</sec>
<sec id="s3_3_2">
<label>3.3.2</label>
<title>Implementation and Hyperparameter Optimization Framework</title>
<p>CatBoost operates efficiently with the concrete dataset&#x2019;s purely numerical features, eliminating preprocessing overhead while maintaining robust performance across the feature space. The algorithm&#x2019;s automatic scaling and normalization capabilities handle the diverse measurement units inherent in concrete mix proportions (<inline-formula id="ieqn-62"><mml:math id="mml-ieqn-62"><mml:msup><mml:mi>kg/m</mml:mi><mml:mrow><mml:mn>3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula> for materials, days for age).</p>
<p>The tree construction process employs a greedy top-down procedure, selecting optimal splits across the eight-dimensional feature space. For concrete applications, this enables the algorithm to identify critical thresholds such as optimal cement content ranges (typically 300&#x2013;450 <inline-formula id="ieqn-63"><mml:math id="mml-ieqn-63"><mml:msup><mml:mi>kg/m</mml:mi><mml:mrow><mml:mn>3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>), effective water-cement ratios (0.3&#x2013;0.6), and beneficial supplementary cementitious material proportions that maximize compressive strength.</p>
<p>CatBoost&#x2019;s hyperparameter space for concrete strength prediction includes: learning rate <inline-formula id="ieqn-64"><mml:math id="mml-ieqn-64"><mml:mi>&#x03B1;</mml:mi><mml:mo>&#x2208;</mml:mo><mml:mo stretchy="false">[</mml:mo><mml:mn>0.01</mml:mn><mml:mo>,</mml:mo><mml:mn>0.3</mml:mn><mml:mo stretchy="false">]</mml:mo></mml:math></inline-formula>, tree depth <inline-formula id="ieqn-65"><mml:math id="mml-ieqn-65"><mml:mi>d</mml:mi><mml:mo>&#x2208;</mml:mo><mml:mo stretchy="false">[</mml:mo><mml:mn>4</mml:mn><mml:mo>,</mml:mo><mml:mn>10</mml:mn><mml:mo stretchy="false">]</mml:mo></mml:math></inline-formula>, L2 regularization <inline-formula id="ieqn-66"><mml:math id="mml-ieqn-66"><mml:mi>&#x03BB;</mml:mi><mml:mo>&#x2208;</mml:mo><mml:mo stretchy="false">[</mml:mo><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mn>10</mml:mn><mml:mo stretchy="false">]</mml:mo></mml:math></inline-formula>, number of iterations <inline-formula id="ieqn-67"><mml:math id="mml-ieqn-67"><mml:msub><mml:mi>n</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>t</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2208;</mml:mo><mml:mo stretchy="false">[</mml:mo><mml:mn>100</mml:mn><mml:mo>,</mml:mo><mml:mn>2000</mml:mn><mml:mo stretchy="false">]</mml:mo></mml:math></inline-formula>, and minimum samples per leaf <inline-formula id="ieqn-68"><mml:math id="mml-ieqn-68"><mml:mi>m</mml:mi><mml:mi>i</mml:mi><mml:msub><mml:mi>n</mml:mi><mml:mrow><mml:mi>s</mml:mi><mml:mi>a</mml:mi><mml:mi>m</mml:mi><mml:mi>p</mml:mi><mml:mi>l</mml:mi><mml:mi>e</mml:mi><mml:mi>s</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2208;</mml:mo><mml:mo stretchy="false">[</mml:mo><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mn>20</mml:mn><mml:mo stretchy="false">]</mml:mo></mml:math></inline-formula>. These parameters significantly influence the model&#x2019;s capacity to capture the complex relationships between mix design variables and resulting compressive strength.</p>
<p>The algorithm excels at modeling the time-dependent strength development captured by the age feature, automatically learning strength gain patterns from early-age (1&#x2013;7 days) to long-term (28&#x2013;365 days) curing periods. CatBoost&#x2019;s gradient-based optimization effectively handles the non-linear strength evolution influenced by cement hydration, pozzolanic reactions from supplementary materials (blast furnace slag, fly ash), and the role of chemical admixtures in strength development.</p>
<p>The integration of CatBoost with the Somersaulting Spider Optimizer enables systematic exploration of the hyperparameter space to maximize prediction accuracy across the diverse range of concrete mix designs represented in the dataset. This optimization approach is particularly valuable given the dataset&#x2019;s comprehensive coverage of conventional and high-performance concrete formulations, where optimal model configurations may vary significantly based on the proportion of supplementary cementitious materials and age-related strength development patterns. The algorithm&#x2019;s computational efficiency makes it ideal for iterative hyperparameter tuning, facilitating the development of highly accurate concrete strength prediction models that can reliably estimate compressive strength based on the eight fundamental mix design parameters.</p>
</sec>
</sec>
</sec>
<sec id="s4">
<label>4</label>
<title>Experimental Results</title>
<p>This section presents a comprehensive evaluation of machine learning models and optimization algorithms for concrete strength prediction through three analytical phases. First, baseline machine learning models are assessed using standard performance metrics to establish fundamental predictive capabilities. Second, metaheuristic optimization algorithms enhance the best-performing baseline model, demonstrating bio-inspired optimization effectiveness. Finally, statistical analysis validates performance differences through parametric and non-parametric testing methods. Standardized hyperparameter configurations were implemented for all optimization algorithms to ensure fair comparison, as presented in <xref ref-type="table" rid="table-3">Table 3</xref>.</p>
<table-wrap id="table-3">
<label>Table 3</label>
<caption>
<title>Hyperparameter settings for metaheuristic optimization algorithms</title>
</caption>
<table>
<colgroup>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th>Alg.</th>
<th>Parameter</th>
<th>Value</th>
<th>Alg.</th>
<th>Parameter</th>
<th>Value</th>
</tr>
</thead>
<tbody>
<tr>
<td>SSO</td>
<td>Exploration factor</td>
<td><inline-formula id="ieqn-69"><mml:math id="mml-ieqn-69"><mml:mn>1</mml:mn><mml:mo>&#x2212;</mml:mo><mml:mi>t</mml:mi><mml:mrow><mml:mo>/</mml:mo></mml:mrow><mml:mi>M</mml:mi><mml:mi>a</mml:mi><mml:mi>x</mml:mi><mml:mi mathvariant="normal">&#x005F;</mml:mi><mml:mi>i</mml:mi><mml:mi>t</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi></mml:math></inline-formula></td>
<td>DE</td>
<td>Crossover probability</td>
<td>0.5</td>
</tr>
<tr>
<td></td>
<td>Exploitation factor</td>
<td><inline-formula id="ieqn-70"><mml:math id="mml-ieqn-70"><mml:mi>t</mml:mi><mml:mrow><mml:mo>/</mml:mo></mml:mrow><mml:mi>M</mml:mi><mml:mi>a</mml:mi><mml:mi>x</mml:mi><mml:mi mathvariant="normal">&#x005F;</mml:mi><mml:mi>i</mml:mi><mml:mi>t</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi></mml:math></inline-formula></td>
<td></td>
<td>Mutation factor</td>
<td>0.5</td>
</tr>
<tr>
<td></td>
<td>Energy increase rate</td>
<td>0.1</td>
<td></td>
<td></td>
<td></td>
</tr>
<tr>
<td></td>
<td>Energy decrease rate</td>
<td>0.95</td>
<td></td>
<td></td>
<td></td>
</tr>
<tr>
<td>GWO</td>
<td>Parameter <inline-formula id="ieqn-71"><mml:math id="mml-ieqn-71"><mml:mi>a</mml:mi></mml:math></inline-formula></td>
<td>2 to 0 (linear)</td>
<td>WOA</td>
<td>Parameter <inline-formula id="ieqn-72"><mml:math id="mml-ieqn-72"><mml:mi>a</mml:mi></mml:math></inline-formula></td>
<td>2 to 0 (linear)</td>
</tr>
<tr>
<td></td>
<td><inline-formula id="ieqn-73"><mml:math id="mml-ieqn-73"><mml:msub><mml:mi>r</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>r</mml:mi><mml:mn>2</mml:mn></mml:msub></mml:math></inline-formula></td>
<td>[0, 1]</td>
<td></td>
<td><inline-formula id="ieqn-74"><mml:math id="mml-ieqn-74"><mml:mi>b</mml:mi></mml:math></inline-formula> constant</td>
<td>1</td>
</tr>
<tr>
<td></td>
<td></td>
<td></td>
<td></td>
<td>Probability <inline-formula id="ieqn-75"><mml:math id="mml-ieqn-75"><mml:mi>p</mml:mi></mml:math></inline-formula></td>
<td>0.5</td>
</tr>
<tr>
<td>GA</td>
<td>Crossover probability</td>
<td>0.7&#x2013;0.9</td>
<td>BA</td>
<td>Frequency range</td>
<td>[0, 100]</td>
</tr>
<tr>
<td></td>
<td>Mutation probability</td>
<td>0.01&#x2013;0.1</td>
<td></td>
<td>Loudness <inline-formula id="ieqn-76"><mml:math id="mml-ieqn-76"><mml:msub><mml:mi>A</mml:mi><mml:mn>0</mml:mn></mml:msub></mml:math></inline-formula></td>
<td>1</td>
</tr>
<tr>
<td></td>
<td>Elite count</td>
<td>2</td>
<td></td>
<td>Pulse rate</td>
<td>[0, 1]</td>
</tr>
<tr>
<td></td>
<td>Selection</td>
<td>Tournament</td>
<td></td>
<td><inline-formula id="ieqn-77"><mml:math id="mml-ieqn-77"><mml:mi>a</mml:mi><mml:mo>=</mml:mo><mml:mi>c</mml:mi></mml:math></inline-formula></td>
<td>0.9</td>
</tr>
<tr>
<td>HHO</td>
<td>Exploration prob.</td>
<td>0.5</td>
<td>PSO</td>
<td><inline-formula id="ieqn-78"><mml:math id="mml-ieqn-78"><mml:msub><mml:mi>C</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>C</mml:mi><mml:mn>2</mml:mn></mml:msub></mml:math></inline-formula></td>
<td>2.0</td>
</tr>
<tr>
<td></td>
<td>Exploitation prob.</td>
<td>0.25 each</td>
<td></td>
<td>Inertia <italic>W</italic></td>
<td>0.9 to 0.4</td>
</tr>
<tr>
<td></td>
<td>Hunting coeff. <inline-formula id="ieqn-79"><mml:math id="mml-ieqn-79"><mml:mi>&#x03B1;</mml:mi></mml:math></inline-formula></td>
<td>2 to 0 (linear)</td>
<td></td>
<td></td>
<td></td>
</tr>
</tbody>
</table>
</table-wrap>
<sec id="s4_1">
<label>4.1</label>
<title>Machine Learning Results</title>
<p><xref ref-type="table" rid="table-4">Table 4</xref> presents a comprehensive comparison of regression models for concrete strength prediction. CatBoost demonstrated superior performance across all evaluation metrics, achieving the lowest Mean Squared Error (MSE) of 16.73 and the highest Pearson correlation coefficient (r) of 0.968, indicating strong predictive accuracy and linear relationship with observed values. The model also attained an <inline-formula id="ieqn-80"><mml:math id="mml-ieqn-80"><mml:msup><mml:mi>R</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula> value of 0.935, explaining 93.5% of the variance in concrete strength predictions. XGBoost ranked second with an MSE of 21.22, while ExtraTrees and RandomForest showed progressively higher error rates with MSE values of 26.99 and 29.57, respectively.</p>
<table-wrap id="table-4">
<label>Table 4</label>
<caption>
<title>Baseline machine learning model performance comparison for concrete strength prediction</title>
</caption>
<table>
<colgroup>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th>Models</th>
<th>MSE</th>
<th>RMSE</th>
<th>MAE</th>
<th>Pearson (r)</th>
<th><inline-formula id="ieqn-81"><mml:math id="mml-ieqn-81"><mml:msup><mml:mi>R</mml:mi><mml:mn>2</mml:mn></mml:msup></mml:math></inline-formula></th>
<th>NSE</th>
<th>Willmott index</th>
<th>Fitted time (s)</th>
</tr>
</thead>
<tbody>
<tr>
<td>CatBoost</td>
<td>16.7264</td>
<td>4.0898</td>
<td>2.7181</td>
<td>0.968</td>
<td>0.9351</td>
<td>0.9351</td>
<td>0.892</td>
<td>0.002</td>
</tr>
<tr>
<td>XGBoost</td>
<td>21.218</td>
<td>4.6063</td>
<td>2.9964</td>
<td>0.959</td>
<td>0.9177</td>
<td>0.9177</td>
<td>0.8813</td>
<td>0.001</td>
</tr>
<tr>
<td>ExtraTrees</td>
<td>26.9945</td>
<td>5.1956</td>
<td>3.3263</td>
<td>0.9475</td>
<td>0.8952</td>
<td>0.8952</td>
<td>0.8668</td>
<td>0.017</td>
</tr>
<tr>
<td>RandomForest</td>
<td>29.5652</td>
<td>5.4374</td>
<td>3.7214</td>
<td>0.9436</td>
<td>0.8853</td>
<td>0.8853</td>
<td>0.8478</td>
<td>0.015</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The error analysis in <xref ref-type="fig" rid="fig-8">Fig. 8</xref> reinforces CatBoost&#x2019;s superiority, showing the lowest RMSE and MAE values among all compared models. CatBoost achieved an RMSE of 4.09 and MAE of 2.72, substantially outperforming other algorithms and establishing it as the most reliable model for concrete strength prediction.</p>
<fig id="fig-8">
<label>Figure 8</label>
<caption>
<title>RMSE and MAE comparison across baseline machine learning models</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMES_73555-fig-8.tif"/>
</fig>
<p><xref ref-type="fig" rid="fig-9">Fig. 9</xref> presents cumulative distribution functions (CDFs) for error metrics, revealing the concentration of prediction errors. The steep rise in all CDF curves indicates that the majority of predictions maintain low error values, with the MAE CDF reaching unity at relatively small error magnitudes, demonstrating consistent predictive accuracy.</p>
<fig id="fig-9">
<label>Figure 9</label>
<caption>
<title>Cumulative distribution functions of error metrics for baseline models</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMES_73555-fig-9.tif"/>
</fig>
<p>The performance density distribution shown in <xref ref-type="fig" rid="fig-10">Fig. 10</xref> demonstrates that the baseline machine learning algorithms produce highly stable and consistent results. The sharp peak and narrow spread around the central value imply minimal variance in prediction quality, suggesting that the ensemble models (CatBoost, XGBoost, ExtraTrees, and RandomForest) are not only accurate but also dependable across repeated evaluations. The significant overlap among the distributions further highlights that these algorithms possess similar generalization capabilities, reinforcing their suitability for robust predictive modeling in the context of concrete strength estimation.</p>
<fig id="fig-10">
<label>Figure 10</label>
<caption>
<title>Performance density distribution of baseline machine learning algorithms</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMES_73555-fig-10.tif"/>
</fig>
<p><xref ref-type="fig" rid="fig-11">Fig. 11</xref> provides a visual performance comparison through color-coded intensities, where lighter shades represent superior performance. CatBoost exhibits the lightest coloration across multiple metrics, particularly for MSE, confirming its optimal performance characteristics combined with efficient computational time.</p>
<fig id="fig-11">
<label>Figure 11</label>
<caption>
<title>Performance heatmap of baseline machine learning models</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMES_73555-fig-11.tif"/>
</fig>
<p>As illustrated in <xref ref-type="fig" rid="fig-12">Fig. 12</xref>, CatBoost consistently outperforms other baseline machine learning models across a diverse set of evaluation criteria. The model achieves near-optimal scores in <inline-formula id="ieqn-82"><mml:math id="mml-ieqn-82"><mml:msup><mml:mi>R</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>, Pearson&#x2019;s <inline-formula id="ieqn-83"><mml:math id="mml-ieqn-83"><mml:mi>r</mml:mi></mml:math></inline-formula>, NSE, and the Willmott Index, indicating strong linear correlation and excellent overall predictive skill. Simultaneously, CatBoost maintains low values for error-based metrics (MAE, RMSE, MSE), which confirms its ability to minimize prediction deviations.</p>
<fig id="fig-12">
<label>Figure 12</label>
<caption>
<title>Multi-dimensional performance radar plot of baseline machine learning models</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMES_73555-fig-12.tif"/>
</fig>
<p><xref ref-type="fig" rid="fig-13">Fig. 13</xref> illustrates the distribution patterns of key performance metrics across all models. The <inline-formula id="ieqn-84"><mml:math id="mml-ieqn-84"><mml:msup><mml:mi>R</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula> distribution shows high concentration around 0.9, confirming strong model performance. The Nash-Sutcliffe Efficiency (NSE) distribution peaks at 0.8, while the Willmott Index centers around 0.9, both indicating robust predictive capabilities across the evaluated models.</p>
<fig id="fig-13">
<label>Figure 13</label>
<caption>
<title>Distribution of performance metrics for baseline machine learning models</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMES_73555-fig-13.tif"/>
</fig>
</sec>
<sec id="s4_2">
<label>4.2</label>
<title>Optimization Results</title>
<p><xref ref-type="table" rid="table-5">Table 5</xref> demonstrates the effectiveness of metaheuristic optimization algorithms in enhancing CatBoost performance. The Somersaulting Spider Optimizer (SSO) achieved the best results when coupled with CatBoost, producing an MSE of 12.61, representing a 24.6% improvement over the baseline CatBoost model. SSO-CatBoost also attained the highest Pearson correlation (0.976) and <inline-formula id="ieqn-85"><mml:math id="mml-ieqn-85"><mml:msup><mml:mi>R</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula> value (0.951), while maintaining efficient computational time (0.001 s). Differential Evolution (DE) and Grey Wolf Optimizer (GWO) ranked second and third respectively, with MSE values of 13.19 and 13.52.</p>
<table-wrap id="table-5">
<label>Table 5</label>
<caption>
<title>SSO and other metaheuristic optimization algorithms performance with CatBoost</title>
</caption>
<table>
<colgroup>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th>Optimized models</th>
<th>MSE</th>
<th>RMSE</th>
<th>MAE</th>
<th>Pearson (r)</th>
<th><inline-formula id="ieqn-86"><mml:math id="mml-ieqn-86"><mml:msup><mml:mi>R</mml:mi><mml:mn>2</mml:mn></mml:msup></mml:math></inline-formula></th>
<th>NSE</th>
<th>Willmott index</th>
<th>Fitted time (s)</th>
</tr>
</thead>
<tbody>
<tr>
<td>SSO-CatBoost</td>
<td>12.6059</td>
<td>3.5505</td>
<td>2.3173</td>
<td>0.9759</td>
<td>0.9511</td>
<td>0.9511</td>
<td>0.909</td>
<td>0.001</td>
</tr>
<tr>
<td>DE-CatBoost</td>
<td>13.1916</td>
<td>3.632</td>
<td>2.3876</td>
<td>0.9745</td>
<td>0.9488</td>
<td>0.9488</td>
<td>0.9058</td>
<td>0.001</td>
</tr>
<tr>
<td>GWO-CatBoost</td>
<td>13.5183</td>
<td>3.6767</td>
<td>2.4087</td>
<td>0.9738</td>
<td>0.9475</td>
<td>0.9475</td>
<td>0.9051</td>
<td>0.001</td>
</tr>
<tr>
<td>WOA-CatBoost</td>
<td>13.6253</td>
<td>3.6912</td>
<td>2.3734</td>
<td>0.9737</td>
<td>0.9471</td>
<td>0.9471</td>
<td>0.9071</td>
<td>0.001</td>
</tr>
<tr>
<td>GA-CatBoost</td>
<td>14.0498</td>
<td>3.7483</td>
<td>2.3948</td>
<td>0.9728</td>
<td>0.9455</td>
<td>0.9455</td>
<td>0.9057</td>
<td>0.001</td>
</tr>
<tr>
<td>BAT-CatBoost</td>
<td>14.4251</td>
<td>3.798</td>
<td>2.5107</td>
<td>0.9725</td>
<td>0.944</td>
<td>0.944</td>
<td>0.9006</td>
<td>0.002</td>
</tr>
<tr>
<td>HHO-CatBoost</td>
<td>14.4382</td>
<td>3.7998</td>
<td>2.4799</td>
<td>0.9724</td>
<td>0.944</td>
<td>0.944</td>
<td>0.9019</td>
<td>0.002</td>
</tr>
<tr>
<td>PSO-CatBoost</td>
<td>15.4593</td>
<td>3.9318</td>
<td>2.4905</td>
<td>0.9704</td>
<td>0.94</td>
<td>0.94</td>
<td>0.9014</td>
<td>0.003</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The performance comparison in <xref ref-type="fig" rid="fig-14">Fig. 14</xref> clearly establishes SSO-CatBoost as the superior approach, achieving the highest Pearson correlation coefficient (0.976) and <inline-formula id="ieqn-87"><mml:math id="mml-ieqn-87"><mml:msup><mml:mi>R</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula> value (0.951) among all optimization algorithms. This demonstrates SSO&#x2019;s capability to identify optimal hyperparameter configurations that maximize model predictive accuracy.</p>
<fig id="fig-14">
<label>Figure 14</label>
<caption>
<title>Pearson correlation and <inline-formula id="ieqn-88"><mml:math id="mml-ieqn-88"><mml:msup><mml:mi>R</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula> performance comparison of SSO vs. other optimization algorithms</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMES_73555-fig-14.tif"/>
</fig>
<p><xref ref-type="fig" rid="fig-15">Fig. 15</xref> presents the error distribution characteristics for optimized models. The rapid rise of CDF curves indicates concentrated low-error predictions, with MSE, RMSE, and MAE reaching cumulative probabilities of unity at relatively small values, confirming the optimization algorithms&#x2019; effectiveness in reducing prediction errors.</p>
<fig id="fig-15">
<label>Figure 15</label>
<caption>
<title>Cumulative distribution functions of error metrics for optimized models</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMES_73555-fig-15.tif"/>
</fig>
<p>The density distribution analysis in <xref ref-type="fig" rid="fig-16">Fig. 16</xref> reveals SSO-CatBoost&#x2019;s performance concentration, with peak density occurring around 12 and a distribution span extending to 30. This pattern indicates consistent performance clustering around optimal values while maintaining exploration capabilities across the performance landscape.</p>
<fig id="fig-16">
<label>Figure 16</label>
<caption>
<title>SSO-CatBoost performance density distribution analysis</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMES_73555-fig-16.tif"/>
</fig>
<p><xref ref-type="fig" rid="fig-17">Fig. 17</xref> visualizes the comparative advantage of SSO-CatBoost through color intensity mapping. SSO-CatBoost exhibits the lightest coloration for error metrics (MSE, RMSE, MAE) while maintaining optimal performance in correlation and efficiency metrics, clearly distinguishing it from other optimization approaches.</p>
<fig id="fig-17">
<label>Figure 17</label>
<caption>
<title>Performance heatmap comparison of SSO-CatBoost and other optimized models</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMES_73555-fig-17.tif"/>
</fig>
<p>The radar plot comparison in <xref ref-type="fig" rid="fig-18">Fig. 18</xref> provides a multi-dimensional performance perspective across all optimization algorithms. Each algorithm demonstrates distinct strengths and trade-offs, with SSO-CatBoost achieving the most balanced profile across all evaluation criteria, effectively combining low error rates with high correlation metrics and computational efficiency.</p>
<fig id="fig-18">
<label>Figure 18</label>
<caption>
<title>Multi-dimensional radar plot analysis of optimization algorithms with CatBoost</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMES_73555-fig-18.tif"/>
</fig>
<p><xref ref-type="fig" rid="fig-19">Fig. 19</xref> shows the enhanced performance distributions achieved through optimization. The metrics exhibit right-skewed distributions toward higher values, with <inline-formula id="ieqn-89"><mml:math id="mml-ieqn-89"><mml:msup><mml:mi>R</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula> concentrating near 0.95, NSE approaching 0.95, and Willmott Index clustering around 0.90. The fitted time distribution remains concentrated at low values, indicating maintained computational efficiency across all optimized models.</p>
<fig id="fig-19">
<label>Figure 19</label>
<caption>
<title>Performance metrics distribution for SSO and other optimized CatBoost models</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMES_73555-fig-19a.tif"/>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMES_73555-fig-19b.tif"/>
</fig>
</sec>
<sec id="s4_3">
<label>4.3</label>
<title>Statistical Analysis</title>
<p><xref ref-type="table" rid="table-6">Table 6</xref> presents the Analysis of Variance (ANOVA) results, examining the statistical significance of performance differences between models. The between-groups analysis yielded an F-statistic of 7.17 with degrees of freedom (7, 439) and a <italic>p</italic>-value &#x003C; 0.0001, indicating statistically significant differences among the compared algorithms. The sum of squares decomposition shows between-groups variance of 0.299 compared to within-groups variance of 2.617, confirming meaningful performance distinctions between optimization approaches.</p>
<table-wrap id="table-6">
<label>Table 6</label>
<caption>
<title>ANOVA statistical significance analysis for SSO and other optimization algorithms</title>
</caption>
<table>
<colgroup>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/> 
</colgroup>
<thead>
<tr>
<th>Source</th>
<th>SS</th>
<th>DF</th>
<th>MS</th>
<th>F</th>
<th><italic>p</italic>-value</th>
</tr>
</thead>
<tbody>
<tr>
<td>Between groups</td>
<td>0.2993</td>
<td>7</td>
<td>0.0428</td>
<td>7.1711 (F(7, 439))</td>
<td>&#x003C;0.0001</td>
</tr>
<tr>
<td>Within groups</td>
<td>2.6172</td>
<td>439</td>
<td>0.006</td>
<td></td>
<td></td>
</tr>
<tr>
<td>Total</td>
<td>2.9165</td>
<td>446</td>
<td></td>
<td></td>
<td></td>
</tr>
</tbody>
</table>
</table-wrap>
<p><xref ref-type="fig" rid="fig-20">Fig. 20</xref> presents the ANOVA analysis on a logarithmic scale, highlighting the contribution of different variance sources. The total sum of squares reaches <inline-formula id="ieqn-90"><mml:math id="mml-ieqn-90"><mml:msup><mml:mi>10</mml:mi><mml:mrow><mml:mn>4</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>, with clear differentiation between within-group and between-group variations, providing visual confirmation of the statistical significance observed in the ANOVA table.</p>
<fig id="fig-20">
<label>Figure 20</label>
<caption>
<title>ANOVA variance decomposition analysis on logarithmic scale</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMES_73555-fig-20.tif"/>
</fig>
<p><xref ref-type="table" rid="table-7">Table 7</xref> summarizes the Wilcoxon signed-rank test results, confirming the statistical significance of each optimization algorithm&#x2019;s performance. SSO-CatBoost achieved the highest actual median (0.945) with a median difference of 0.945 from the theoretical median of zero. All algorithms demonstrated statistical significance (<italic>p</italic> &#x003C; 0.001) at <inline-formula id="ieqn-91"><mml:math id="mml-ieqn-91"><mml:mi>&#x03B1;</mml:mi></mml:math></inline-formula> &#x003D; 0.05, with SSO-CatBoost showing the most favorable performance characteristics among the tested approaches. The notation (&#x002A;&#x002A;&#x002A;) signifies statistical significance at <inline-formula id="ieqn-92"><mml:math id="mml-ieqn-92"><mml:mi>p</mml:mi><mml:mo>&#x003C;</mml:mo><mml:mn>0.001</mml:mn></mml:math></inline-formula>.</p>
<table-wrap id="table-7">
<label>Table 7</label>
<caption>
<title>Wilcoxon signed-Rank test results for metaheuristic-optimized CatBoost models</title>
</caption>
<table>
<colgroup>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th>Model</th>
<th>Actual median</th>
<th>Sample size</th>
<th>Sum of positive ranks</th>
<th><italic>p</italic>-value (Two-tailed)</th>
<th>Statistically significant (<inline-formula id="ieqn-93"><mml:math id="mml-ieqn-93"><mml:mi>&#x03B1;</mml:mi><mml:mo>=</mml:mo><mml:mn>0.05</mml:mn></mml:math></inline-formula>)</th>
</tr>
</thead>
<tbody>
<tr>
<td>SSO-CatBoost</td>
<td>0.9448</td>
<td>50</td>
<td>1275</td>
<td>1.78E-15</td>
<td>Yes (&#x002A;&#x002A;&#x002A;)</td>
</tr>
<tr>
<td>GA-CatBoost</td>
<td>0.9423</td>
<td>50</td>
<td>1275</td>
<td>1.78E-15</td>
<td>Yes (&#x002A;&#x002A;&#x002A;)</td>
</tr>
<tr>
<td>BAT-CatBoost</td>
<td>0.9382</td>
<td>50</td>
<td>1275</td>
<td>1.78E-15</td>
<td>Yes (&#x002A;&#x002A;&#x002A;)</td>
</tr>
<tr>
<td>GWO-CatBoost</td>
<td>0.9367</td>
<td>50</td>
<td>1275</td>
<td>1.78E-15</td>
<td>Yes (&#x002A;&#x002A;&#x002A;)</td>
</tr>
<tr>
<td>DE-CatBoost</td>
<td>0.9331</td>
<td>50</td>
<td>1275</td>
<td>1.78E-15</td>
<td>Yes (&#x002A;&#x002A;&#x002A;)</td>
</tr>
<tr>
<td>PSO-CatBoost</td>
<td>0.9323</td>
<td>50</td>
<td>1275</td>
<td>1.78E-15</td>
<td>Yes (&#x002A;&#x002A;&#x002A;)</td>
</tr>
<tr>
<td>WOA-CatBoost</td>
<td>0.9354</td>
<td>50</td>
<td>1275</td>
<td>1.78E-15</td>
<td>Yes (&#x002A;&#x002A;&#x002A;)</td>
</tr>
<tr>
<td>HHO-CatBoost</td>
<td>0.9330</td>
<td>50</td>
<td>1275</td>
<td>1.78E-15</td>
<td>Yes (&#x002A;&#x002A;&#x002A;)</td>
</tr>
</tbody>
</table>
</table-wrap>
<p><xref ref-type="fig" rid="fig-21">Fig. 21</xref> illustrates the <inline-formula id="ieqn-94"><mml:math id="mml-ieqn-94"><mml:msup><mml:mi>R</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula> score distributions for all optimization algorithms through box plots. SSO-CatBoost demonstrates the highest median <inline-formula id="ieqn-95"><mml:math id="mml-ieqn-95"><mml:msup><mml:mi>R</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula> score (0.95) with the smallest interquartile range, indicating both superior performance and consistency. The box plot reveals SSO-CatBoost&#x2019;s distribution is shifted toward higher <inline-formula id="ieqn-96"><mml:math id="mml-ieqn-96"><mml:msup><mml:mi>R</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula> values compared to other algorithms, confirming its enhanced capability in explaining variance within the concrete strength dataset.</p>
<fig id="fig-21">
<label>Figure 21</label>
<caption>
<title><inline-formula id="ieqn-100"><mml:math id="mml-ieqn-100"><mml:msup><mml:mi>R</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula> score distribution box plot analysis for SSO and other optimization algorithms</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMES_73555-fig-21.tif"/>
</fig>
<p>The statistical analysis reveals the strength and dependability of the offered optimization model. The results of the ANOVA have shown very significant differences between the optimization algorithms tested, having an F-test with an F-statistic of 7.17 and a p-value less than <inline-formula id="ieqn-97"><mml:math id="mml-ieqn-97"><mml:mi>&#x03B1;</mml:mi><mml:mo>=</mml:mo><mml:mn>0.05</mml:mn></mml:math></inline-formula>, which shows that the improvement noticed is not just a result of random chance. Integrative validation with the help of Wilcoxon signed-rank test further supported this, and all identified optimal CatBoost models had statistically significant performance improvements (<inline-formula id="ieqn-98"><mml:math id="mml-ieqn-98"><mml:mi>p</mml:mi><mml:mo>&#x003C;</mml:mo><mml:mn>0.001</mml:mn></mml:math></inline-formula>). Of them, SSO-CatBoost had the best median <inline-formula id="ieqn-99"><mml:math id="mml-ieqn-99"><mml:msup><mml:mi>R</mml:mi><mml:mn>2</mml:mn></mml:msup></mml:math></inline-formula> estimates and the smallest ranges of variability, indicating its greater predictive accuracy and stability. Overall, these statistical tests can support the usefulness of metaheuristic optimization as a method of improving machine learning to predict concrete strength, and SSO is the most promising.</p>
</sec>
</sec>
<sec id="s5">
<label>5</label>
<title>Discussion</title>
<p>This study demonstrates the effectiveness of integrating the Somersaulting Spider Optimizer (SSO) with the CatBoost machine learning algorithm for predicting concrete compressive strength. The proposed hybrid framework effectively balances exploration and exploitation within the hyperparameter space, enabling the identification of optimal parameter combinations that substantially enhance predictive performance. The adaptive energy management mechanism embedded in the SSO algorithm facilitates dynamic search behavior, effectively avoiding premature convergence to local minima&#x2014;a common limitation of traditional optimization approaches. Combined with CatBoost&#x2019;s capabilities for handling categorical and numerical features through ordered boosting and symmetric decision tree structures, the model successfully captures complex nonlinear relationships among concrete mix proportions, curing age, and compressive strength. As shown in <xref ref-type="table" rid="table-5">Table 5</xref>, the SSO-CatBoost model achieved the lowest mean squared error of 12.61 and the highest coefficient of determination (R<sup>2</sup>) of 0.951 among all optimized models, representing a 24.6% improvement over the baseline CatBoost model presented in <xref ref-type="table" rid="table-4">Table 4</xref>. This performance improvement is further illustrated in <xref ref-type="fig" rid="fig-17">Fig. 17</xref>, which visualizes the comparative advantage through color intensity mapping where SSO-CatBoost exhibits the lightest coloration for error metrics. The enhanced prediction accuracy enables more refined concrete mixture designs that minimize material consumption while maximizing structural reliability, potentially leading to significant cost reductions in large-scale concrete production.</p>

<p>Feature importance analysis, as presented in <xref ref-type="fig" rid="fig-7">Fig. 7</xref>, indicates that cement content and concrete age exert the strongest influence on compressive strength prediction, consistent with established knowledge regarding cement hydration kinetics and strength development mechanisms. Secondary yet significant contributions from water content, supplementary cementitious materials, and chemical admixtures underscore the multifaceted nature of mix design effects on concrete performance, as evidenced by the correlation patterns shown in <xref ref-type="fig" rid="fig-2">Fig. 2</xref>. The statistical significance of the SSO-CatBoost model&#x2019;s superiority is confirmed through ANOVA results presented in <xref ref-type="table" rid="table-6">Table 6</xref>, which yielded an F-statistic of 7.17 with <italic>p</italic>-value &#x003C; 0.0001, and the Wilcoxon signed-rank test outcomes in <xref ref-type="table" rid="table-7">Table 7</xref>, which established that SSO-CatBoost achieved the highest actual median (0.945) with statistical significance (<italic>p</italic> &#x003C; 0.001). This trend is further supported by the R<sup>2</sup> distribution boxplots in <xref ref-type="fig" rid="fig-21">Fig. 21</xref>, which demonstrates SSO-CatBoost&#x2019;s highest median R<sup>2</sup> score (0.95) with the smallest interquartile range, and the radar comparison in <xref ref-type="fig" rid="fig-18">Fig. 18</xref>, which collectively demonstrate consistent outperformance across multiple evaluation metrics. The cumulative distribution functions presented in <xref ref-type="fig" rid="fig-15">Fig. 15</xref> further confirm the concentrated low-error predictions achieved by the optimization framework. Although the dataset encompasses diverse mix proportions without exhaustive material property characterization, the methodology remains highly applicable to industry-standard mix design processes. Overall, this study provides a transparent and reproducible framework while generating actionable insights for concrete mixture optimization and the development of sustainable, resilient infrastructure systems.</p>

<p>According to the No Free Lunch theorem, the strong performance of the SSO-CatBoost framework is specific to the concrete compressive strength dataset and cannot be universally. When considering new datasets, alternative material systems, or different prediction objectives, it is necessary to re-evaluate and benchmark the framework to ensure effectiveness in each new context.</p>
</sec>
<sec id="s6">
<label>6</label>
<title>Conclusion and Future Work</title>
<p>This study demonstrated a novel concrete compressive strength prediction strategy, proposed by producing the fusion of the Somersaulting Spider Optimizer (SSO) and the CatBoost model to address the relevant and considerable issue of hyperparameter optimization in machine learning-guided solutions of concrete engineering. The experimentation could demonstrate the effectiveness of bio-inspired optimization techniques for enhancing the accuracy of prediction in compound material characteristics. The developed SSO algorithm, built on the characteristic locomotion behaviour, provided a new balancing of exploratory and exploitative strategies, employing adaptive energy management and dual-phase movement strategies. The somersaulting and rolling motions of the algorithm, along with the stagnation-based intensity adjustment, are more efficient in comparison to conventional optimization procedures. In machine learning systems, the hyperparameter optimization, which was offered by mathematical models of time-dependent adaptive factors and time-dependent energy-based behavioural switching models, had a solid background.</p>
<p>Since there are seven known methods of metaheuristic optimization applied (Differential Evolution, Grey Wolf Optimizer, Whale Optimization Algorithm, Genetic Algorithm, Bat Algorithm, Harris Hawks Optimization and Particle Swarm Optimization), comparative analysis demonstrated that the SSO was effective across a wide scope of performance indicators. The convergence speed of the SSO was never outperformed by the competing algorithms in the quality of solutions and robustness, demonstrating that the algorithm has a broader range of applications in engineering optimization problems.</p>
<p>The Analysis of Variance (ANOVA) and the Wilcoxon signed-rank tests revealed the statistical value of the alterations in performance brought about by the implementation of the proposed methodology. The pertinent and stringent statistical analysis involved in the assessment of confidence vests precision in the algorithm, not only in confidence but also in workability in real-life work in the engineering field.</p>
<p>It is possible to identify some of the future research directions. This paper presents a variety of possible future avenues for the current research. Continued development of the SSO algorithm would enable the optimization of multiple concrete properties concurrently throughout such measures as strength, durability, workability and sustainability. It would enhance the generalisability of the algorithm to research on the algorithm applied on larger datasets comprising various geographical settings, material provision and construction techniques.</p>
<p>A possible industrial route will be the real-time use of the SSO-CatBoost system in concrete production facilities to test the product quality and adjust the mix compositions in real time. In addition, an improvement of the methodology in forecasting other tangible features, such as the lifespan of concrete, permeability, and thermal properties, would be helpful to widen its scope of use. A limitation is that the study does not perform sensitivity analysis on all hyperparameters.</p>
</sec>
</body>
<back>
<ack>
<p>Princess Nourah bint Abdulrahman University Researchers Supporting Project, Princess Nourah bint Abdulrahman University, Riyadh, Saudi Arabia.</p>
</ack>
<sec>
<title>Funding Statement</title>
<p>Princess Nourah bint Abdulrahman University Researchers Supporting Project number (PNURSP2025R308), Princess Nourah bint Abdulrahman University, Riyadh, Saudi Arabia.</p>
</sec>
<sec>
<title>Author Contributions</title>
<p>The authors confirm contribution to the paper as follows: Conceptualization, Marwa M. Eid and Amel Ali Alhussan; methodology, El-Sayed M. El-Kenawy and Ebrahim A. Mattar; software, El-Sayed M. El-Kenawy; validation, El-Sayed M. El-Kenawy, Amel Ali Alhussan, and Nima Khodadadi; formal analysis, Ebrahim A. Mattar and El-Sayed M. El-Kenawy; investigation, Marwa M. Eid and El-Sayed M. El-Kenawy; resources, Amel Ali Alhussan and Ebrahim A. Mattar; data curation, Nima Khodadadi and El-Sayed M. El-Kenawy; writing&#x2014;original draft preparation, Marwa M. Eid and El-Sayed M. El-Kenawy; writing&#x2014;review and editing, Amel Ali Alhussan, Ebrahim A. Mattar, and Nima Khodadadi; visualization, Nima Khodadadi; supervision, Marwa M. Eid; project administration, Nima Khodadadi; funding acquisition, El-Sayed M. El-Kenawy and Amel Ali Alhussan. All authors reviewed the results and approved the final version of the manuscript.</p>
</sec>
<sec sec-type="data-availability">
<title>Availability of Data and Materials</title>
<p>The dataset used in this study is publicly available on Kaggle at <ext-link ext-link-type="uri" xlink:href="https://www.kaggle.com/datasets/prathamtripathi/regression-with-neural-networking">https://www.kaggle.com/datasets/prathamtripathi/regression-with-neural-networking</ext-link> (accessed on 30 July 2025).</p>
</sec>
<sec>
<title>Ethics Approval</title>
<p>Not applicable.</p>
</sec>
<sec sec-type="COI-statement">
<title>Conflicts of Interest</title>
<p>The authors declare no conflicts of interest to report regarding the present study.</p>
</sec>
<ref-list content-type="authoryear">
<title>References</title>
<ref id="ref-1"><label>[1]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Feng</surname> <given-names>DC</given-names></string-name>, <string-name><surname>Liu</surname> <given-names>ZT</given-names></string-name>, <string-name><surname>Wang</surname> <given-names>XD</given-names></string-name>, <string-name><surname>Chen</surname> <given-names>Y</given-names></string-name>, <string-name><surname>Chang</surname> <given-names>JQ</given-names></string-name>, <string-name><surname>Wei</surname> <given-names>DF</given-names></string-name>, <etal>et al</etal></person-group>. <article-title>Machine learning-based compressive strength prediction for concrete: an adaptive boosting approach</article-title>. <source>Constr Build Mater</source>. <year>2020</year>;<volume>230</volume>(<issue>3</issue>):<fpage>117000</fpage>. doi:<pub-id pub-id-type="doi">10.1016/j.conbuildmat.2019.117000</pub-id>.</mixed-citation></ref>
<ref id="ref-2"><label>[2]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Nguyen-Sy</surname> <given-names>T</given-names></string-name></person-group>. <article-title>Optimized hybrid XGBoost-CatBoost model for enhanced prediction of concrete strength and reliability analysis using Monte Carlo simulations</article-title>. <source>Appl Soft Comput</source>. <year>2024</year>;<volume>167</volume>(<issue>8</issue>):<fpage>112490</fpage>. doi:<pub-id pub-id-type="doi">10.1016/j.asoc.2024.112490</pub-id>.</mixed-citation></ref>
<ref id="ref-3"><label>[3]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Zhang</surname> <given-names>W</given-names></string-name>, <string-name><surname>Wang</surname> <given-names>D</given-names></string-name></person-group>. <article-title>Damage identification using deep learning and long-gauge fiber Bragg grating sensors</article-title>. <source>Appl Opt</source>. <year>2020</year>;<volume>59</volume>(<issue>33</issue>):<fpage>10532</fpage>&#x2013;<lpage>40</lpage>. doi:<pub-id pub-id-type="doi">10.1364/ao.405110</pub-id>; <pub-id pub-id-type="pmid">33361988</pub-id></mixed-citation></ref>
<ref id="ref-4"><label>[4]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Lyngdoh</surname> <given-names>GA</given-names></string-name>, <string-name><surname>Li</surname> <given-names>H</given-names></string-name>, <string-name><surname>Zaki</surname> <given-names>M</given-names></string-name>, <string-name><surname>Krishnan</surname> <given-names>NMA</given-names></string-name>, <string-name><surname>Das</surname> <given-names>S</given-names></string-name></person-group>. <article-title>Elucidating the constitutive relationship of calcium-silicate-hydrate gel using high throughput reactive molecular simulations and machine learning</article-title>. <source>Sci Rep</source>. <year>2020</year>;<volume>10</volume>(<issue>1</issue>):<fpage>21336</fpage>. doi:<pub-id pub-id-type="doi">10.1038/s41598-020-78368-1</pub-id>; <pub-id pub-id-type="pmid">33288786</pub-id></mixed-citation></ref>
<ref id="ref-5"><label>[5]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Taffese</surname> <given-names>WZ</given-names></string-name>, <string-name><surname>Nigussie</surname> <given-names>E</given-names></string-name></person-group>. <article-title>Autonomous corrosion assessment of reinforced concrete structures: feasibility study</article-title>. <source>Sensors</source>. <year>2020</year>;<volume>20</volume>(<issue>23</issue>):<fpage>6825</fpage>. doi:<pub-id pub-id-type="doi">10.3390/s20236825</pub-id>.</mixed-citation></ref>
<ref id="ref-6"><label>[6]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Zhang</surname> <given-names>Y</given-names></string-name>, <string-name><surname>Ren</surname> <given-names>W</given-names></string-name>, <string-name><surname>Lei</surname> <given-names>J</given-names></string-name>, <string-name><surname>Sun</surname> <given-names>L</given-names></string-name>, <string-name><surname>Mi</surname> <given-names>Y</given-names></string-name>, <string-name><surname>Chen</surname> <given-names>Y</given-names></string-name></person-group>. <article-title>Predicting the compressive strength of high-performance concrete via the DR-CatBoost model</article-title>. <source>Case Stud Constr Mater</source>. <year>2024</year>;<volume>21</volume>:<fpage>e03990</fpage>. doi:<pub-id pub-id-type="doi">10.1016/j.cscm.2024.e03990</pub-id>.</mixed-citation></ref>
<ref id="ref-7"><label>[7]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Bhowmick</surname> <given-names>S</given-names></string-name>, <string-name><surname>Nagarajaiah</surname> <given-names>S</given-names></string-name>, <string-name><surname>Veeraraghavan</surname> <given-names>A</given-names></string-name></person-group>. <article-title>Vision and deep learning-based algorithms to detect and quantify cracks on concrete surfaces from UAV videos</article-title>. <source>Sensors</source>. <year>2020</year>;<volume>20</volume>(<issue>21</issue>):<fpage>6299</fpage>. doi:<pub-id pub-id-type="doi">10.3390/s20216299</pub-id>; <pub-id pub-id-type="pmid">33167411</pub-id></mixed-citation></ref>
<ref id="ref-8"><label>[8]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Rao</surname> <given-names>PR</given-names></string-name>, <string-name><surname>Harika</surname> <given-names>R</given-names></string-name>, <string-name><surname>Dass</surname> <given-names>ZSC</given-names></string-name></person-group>. <article-title>A comprehensive review on the application of machine learning models in concrete strength prediction</article-title>. <source>J Phys: Conf Ser</source>. <year>2025</year>;<volume>3076</volume>(<issue>1</issue>):<fpage>012015</fpage>. doi:<pub-id pub-id-type="doi">10.1088/1742-6596/3076/1/012015</pub-id>.</mixed-citation></ref>
<ref id="ref-9"><label>[9]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Singh</surname> <given-names>S</given-names></string-name>, <string-name><surname>Patro</surname> <given-names>SK</given-names></string-name>, <string-name><surname>Parhi</surname> <given-names>SK</given-names></string-name></person-group>. <article-title>Evolutionary optimization of machine learning algorithm hyperparameters for strength prediction of high-performance concrete</article-title>. <source>Asian J Civil Eng</source>. <year>2023</year>;<volume>24</volume>(<issue>8</issue>):<fpage>3121</fpage>&#x2013;<lpage>43</lpage>. doi:<pub-id pub-id-type="doi">10.1007/s42107-023-00698-y</pub-id>.</mixed-citation></ref>
<ref id="ref-10"><label>[10]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Tang</surname> <given-names>F</given-names></string-name>, <string-name><surname>Wu</surname> <given-names>Y</given-names></string-name>, <string-name><surname>Zhou</surname> <given-names>Y</given-names></string-name></person-group>. <article-title>Hybridizing grid search and support vector regression to predict the compressive strength of fly ash concrete</article-title>. <source>Adv Civil Eng</source>. <year>2022</year>;<volume>2022</volume>(<issue>1</issue>):<fpage>3601914</fpage>. doi:<pub-id pub-id-type="doi">10.1155/2022/3601914</pub-id>.</mixed-citation></ref>
<ref id="ref-11"><label>[11]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Van Mullem</surname> <given-names>T</given-names></string-name>, <string-name><surname>Anglani</surname> <given-names>G</given-names></string-name>, <string-name><surname>Dudek</surname> <given-names>M</given-names></string-name>, <string-name><surname>Vanoutrive</surname> <given-names>H</given-names></string-name>, <string-name><surname>Bumanis</surname> <given-names>G</given-names></string-name>, <string-name><surname>Litina</surname> <given-names>C</given-names></string-name>, <etal>et al</etal></person-group>. <article-title>Addressing the need for standardization of test methods for self-healing concrete: an inter-laboratory study on concrete with macrocapsules</article-title>. <source>Sci Technol Adv Mater</source>. <year>2020</year>;<volume>21</volume>(<issue>1</issue>):<fpage>661</fpage>&#x2013;<lpage>82</lpage>. doi:<pub-id pub-id-type="doi">10.1080/14686996.2020.1814117</pub-id>; <pub-id pub-id-type="pmid">33061839</pub-id></mixed-citation></ref>
<ref id="ref-12"><label>[12]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Zhang</surname> <given-names>G</given-names></string-name>, <string-name><surname>Ali</surname> <given-names>ZH</given-names></string-name>, <string-name><surname>Aldlemy</surname> <given-names>MS</given-names></string-name>, <string-name><surname>Mussa</surname> <given-names>MH</given-names></string-name>, <string-name><surname>Salih</surname> <given-names>SQ</given-names></string-name>, <string-name><surname>Hameed</surname> <given-names>MM</given-names></string-name>, <etal>et al</etal></person-group>. <article-title>Reinforced concrete deep beam shear strength capacity modelling using an integrative bio-inspired algorithm with an artificial intelligence model</article-title>. <source>Engineering with Computers</source>. <year>2022</year>;<volume>38</volume>(<issue>Suppl 1</issue>):<fpage>15</fpage>&#x2013;<lpage>28</lpage>. doi:<pub-id pub-id-type="doi">10.1007/s00366-020-01137-1</pub-id>.</mixed-citation></ref>
<ref id="ref-13"><label>[13]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Lu</surname> <given-names>S</given-names></string-name>, <string-name><surname>Koopialipoor</surname> <given-names>M</given-names></string-name>, <string-name><surname>Asteris</surname> <given-names>PG</given-names></string-name>, <string-name><surname>Bahri</surname> <given-names>M</given-names></string-name>, <string-name><surname>Armaghani</surname> <given-names>DJ</given-names></string-name></person-group>. <article-title>A novel feature selection approach based on tree models for evaluating the punching shear capacity of steel fiber-reinforced concrete flat slabs</article-title>. <source>Materials</source>. <year>2020</year>;<volume>13</volume>(<issue>17</issue>):<fpage>3902</fpage>. doi:<pub-id pub-id-type="doi">10.3390/ma13173902</pub-id>; <pub-id pub-id-type="pmid">32899331</pub-id></mixed-citation></ref>
<ref id="ref-14"><label>[14]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Imran</surname> <given-names>M</given-names></string-name>, <string-name><surname>Khushnood</surname> <given-names>RA</given-names></string-name>, <string-name><surname>Fawad</surname> <given-names>M</given-names></string-name></person-group>. <article-title>A hybrid data-driven and metaheuristic optimization approach for the compressive strength prediction of high-performance concrete</article-title>. <source>Case Stud Constr Mater</source>. <year>2023</year>;<volume>18</volume>(<issue>12</issue>):<fpage>e01890</fpage>. doi:<pub-id pub-id-type="doi">10.1016/j.cscm.2023.e01890</pub-id>.</mixed-citation></ref>
<ref id="ref-15"><label>[15]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Billah</surname> <given-names>UH</given-names></string-name>, <string-name><surname>La</surname> <given-names>HM</given-names></string-name>, <string-name><surname>Tavakkoli</surname> <given-names>A</given-names></string-name></person-group>. <article-title>Deep learning-based feature silencing for accurate concrete crack detection</article-title>. <source>Sensors</source>. <year>2020</year>;<volume>20</volume>(<issue>16</issue>):<fpage>4403</fpage>. doi:<pub-id pub-id-type="doi">10.20944/preprints202007.0474.v1</pub-id>.</mixed-citation></ref>
<ref id="ref-16"><label>[16]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Nguyen</surname> <given-names>QH</given-names></string-name>, <string-name><surname>Ly</surname> <given-names>HB</given-names></string-name>, <string-name><surname>Tran</surname> <given-names>VQ</given-names></string-name>, <string-name><surname>Nguyen</surname> <given-names>TA</given-names></string-name>, <string-name><surname>Phan</surname> <given-names>VH</given-names></string-name>, <string-name><surname>Le</surname> <given-names>TT</given-names></string-name>, <etal>et al</etal></person-group>. <article-title>A novel hybrid model based on a feedforward neural network and one step secant algorithm for prediction of load-bearing capacity of rectangular concrete-filled steel tube columns</article-title>. <source>Molecules</source>. <year>2020</year>;<volume>25</volume>(<issue>15</issue>):<fpage>3486</fpage>. doi:<pub-id pub-id-type="doi">10.3390/molecules25153486</pub-id>; <pub-id pub-id-type="pmid">32751914</pub-id></mixed-citation></ref>
<ref id="ref-17"><label>[17]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Li</surname> <given-names>P</given-names></string-name>, <string-name><surname>Zhang</surname> <given-names>Z</given-names></string-name>, <string-name><surname>Gu</surname> <given-names>J</given-names></string-name></person-group>. <article-title>Prediction of concrete compressive strength based on ISSA-BPNN-AdaBoost</article-title>. <source>Materials</source>. <year>2024</year>;<volume>17</volume>(<issue>23</issue>):<fpage>5727</fpage>. doi:<pub-id pub-id-type="doi">10.3390/ma17235727</pub-id>.</mixed-citation></ref>
<ref id="ref-18"><label>[18]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Zhang</surname> <given-names>Y</given-names></string-name>, <string-name><surname>Ren</surname> <given-names>W</given-names></string-name>, <string-name><surname>Chen</surname> <given-names>Y</given-names></string-name>, <string-name><surname>Mi</surname> <given-names>Y</given-names></string-name>, <string-name><surname>Lei</surname> <given-names>J</given-names></string-name>, <string-name><surname>Sun</surname> <given-names>L</given-names></string-name></person-group>. <article-title>Predicting the compressive strength of high-performance concrete using an interpretable machine learning model</article-title>. <source>Sci Rep</source>. <year>2024</year>;<volume>14</volume>(<issue>1</issue>):<fpage>28346</fpage>. doi:<pub-id pub-id-type="doi">10.1038/s41598-024-79502-z</pub-id>; <pub-id pub-id-type="pmid">39550464</pub-id></mixed-citation></ref>
<ref id="ref-19"><label>[19]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Guzm&#x00E1;n-Torres</surname> <given-names>JA</given-names></string-name>, <string-name><surname>Dominguez-Mota</surname> <given-names>FJ</given-names></string-name>, <string-name><surname>Alonso-Guzm&#x00E1;n</surname> <given-names>EM</given-names></string-name>, <string-name><surname>Tinoco-Guerrero</surname> <given-names>G</given-names></string-name>, <string-name><surname>Martinez-Molina</surname> <given-names>W</given-names></string-name></person-group>. <article-title>ConcreteXAI: a multivariate dataset for concrete strength prediction via deep-learning-based methods</article-title>. <source>Data Brief</source>. <year>2024</year>;<volume>53</volume>(<issue>8</issue>):<fpage>110218</fpage>. doi:<pub-id pub-id-type="doi">10.1016/j.dib.2024.110218</pub-id>; <pub-id pub-id-type="pmid">38425877</pub-id></mixed-citation></ref>
<ref id="ref-20"><label>[20]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Javed</surname> <given-names>MF</given-names></string-name>, <string-name><surname>Fawad</surname> <given-names>M</given-names></string-name>, <string-name><surname>Lodhi</surname> <given-names>R</given-names></string-name>, <string-name><surname>Najeh</surname> <given-names>T</given-names></string-name>, <string-name><surname>Gamil</surname> <given-names>Y</given-names></string-name></person-group>. <article-title>Forecasting the strength of preplaced aggregate concrete using interpretable machine learning approaches</article-title>. <source>Sci Rep</source>. <year>2024</year>;<volume>14</volume>(<issue>1</issue>):<fpage>8381</fpage>. doi:<pub-id pub-id-type="doi">10.1038/s41598-024-57896-0</pub-id>; <pub-id pub-id-type="pmid">38600161</pub-id></mixed-citation></ref>
<ref id="ref-21"><label>[21]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Hasan</surname> <given-names>MR</given-names></string-name>, <string-name><surname>Shuvo</surname> <given-names>AK</given-names></string-name>, <string-name><surname>Pranto</surname> <given-names>EB</given-names></string-name>, <string-name><surname>Hasan</surname> <given-names>M</given-names></string-name>, <string-name><surname>Miah</surname> <given-names>MM</given-names></string-name></person-group>. <article-title>Data-driven prediction of concrete strength by machine learning: hybrid-fiber-reinforced recycled aggregate concrete</article-title>. <source>World J Eng</source>. <year>2025</year>;<volume>41</volume>(<issue>4</issue>):<fpage>1183</fpage>. doi:<pub-id pub-id-type="doi">10.1108/wje-01-2025-0038</pub-id>.</mixed-citation></ref>
<ref id="ref-22"><label>[22]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Liu</surname> <given-names>Y</given-names></string-name></person-group>. <article-title>High-performance concrete strength prediction based on machine learning</article-title>. <source>Comput Intell Neurosci</source>. <year>2022</year>;<volume>2022</volume>:<fpage>1</fpage>&#x2013;<lpage>7</lpage>. doi:<pub-id pub-id-type="doi">10.1155/2022/5802217</pub-id>; <pub-id pub-id-type="pmid">35669631</pub-id></mixed-citation></ref>
<ref id="ref-23"><label>[23]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Qi</surname> <given-names>C</given-names></string-name>, <string-name><surname>Huang</surname> <given-names>B</given-names></string-name>, <string-name><surname>Wu</surname> <given-names>M</given-names></string-name>, <string-name><surname>Wang</surname> <given-names>K</given-names></string-name>, <string-name><surname>Yang</surname> <given-names>S</given-names></string-name>, <string-name><surname>Li</surname> <given-names>G</given-names></string-name></person-group>. <article-title>Concrete strength prediction using different machine learning processes: eeffect of slag, fly ash and superplasticizer</article-title>. <source>Materials</source>. <year>2022</year>;<volume>15</volume>(<issue>15</issue>):<fpage>5369</fpage>. doi:<pub-id pub-id-type="doi">10.3390/ma15155369</pub-id>; <pub-id pub-id-type="pmid">35955301</pub-id></mixed-citation></ref>
<ref id="ref-24"><label>[24]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Mirzaei</surname> <given-names>A</given-names></string-name>, <string-name><surname>Aghsami</surname> <given-names>A</given-names></string-name></person-group>. <article-title>A hybrid deep reinforcement learning architecture for optimizing concrete mix design through precision strength prediction</article-title>. <source>Math Comput Appl</source>. <year>2025</year>;<volume>30</volume>(<issue>4</issue>):<fpage>83</fpage>. doi:<pub-id pub-id-type="doi">10.3390/mca30040083</pub-id>.</mixed-citation></ref>
<ref id="ref-25"><label>[25]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Nwetlawung</surname> <given-names>ZE</given-names></string-name>, <string-name><surname>Lin</surname> <given-names>YH</given-names></string-name></person-group>. <article-title>Development of an optimization algorithm for designing low-carbon concrete materials standardization with blockchain technology and ensemble machine learning methods</article-title>. <source>Buildings</source>. <year>2025</year>;<volume>15</volume>(<issue>16</issue>):<fpage>2809</fpage>. doi:<pub-id pub-id-type="doi">10.3390/buildings15162809</pub-id>.</mixed-citation></ref>
<ref id="ref-26"><label>[26]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Gazi</surname> <given-names>MU</given-names></string-name>, <string-name><surname>Hasan</surname> <given-names>MT</given-names></string-name>, <string-name><surname>Debnath</surname> <given-names>P</given-names></string-name></person-group>. <article-title>Few-shot meta-learning for concrete strength prediction: a model-agnostic approach with SHAP analysis</article-title>. <source>Innov Infrastruct Solut</source>. <year>2025</year>;<volume>4</volume>(<issue>1</issue>):<fpage>20</fpage>. doi:<pub-id pub-id-type="doi">10.1007/s43503-025-00064-8</pub-id>.</mixed-citation></ref>
<ref id="ref-27"><label>[27]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Zaki</surname> <given-names>AM</given-names></string-name>, <string-name><surname>Nafea</surname> <given-names>HB</given-names></string-name>, <string-name><surname>Moustafa</surname> <given-names>HED</given-names></string-name>, <string-name><surname>El-Kenawy</surname> <given-names>ESM</given-names></string-name></person-group>. <article-title>Optimizing earthquake prediction accuracy using somersaulting spider optimizer for dynamic ensemble weighting</article-title>. <source>J Intell Syst Internet of Things</source>. <year>2026</year>;<volume>2</volume>:<fpage>386</fpage>&#x2013;<lpage>409</lpage>.</mixed-citation></ref>
<ref id="ref-28"><label>[28]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Zaki</surname> <given-names>AM</given-names></string-name>, <string-name><surname>Nafea</surname> <given-names>HB</given-names></string-name>, <string-name><surname>Moustafa</surname> <given-names>HED</given-names></string-name>, <string-name><surname>El-Kenawy</surname> <given-names>ESM</given-names></string-name></person-group>. <article-title>Somersaulting spider optimizer (SSO): a nature-inspired metaheuristic algorithm for engineering optimization problems</article-title>. <source>J Artifi Intell Metaheuristics</source>. <year>2025</year>;<volume>10</volume>(<issue>1</issue>):<fpage>91</fpage>&#x2013;<lpage>120</lpage>. doi:<pub-id pub-id-type="doi">10.54216/JAIM.100105</pub-id>.</mixed-citation></ref>
<ref id="ref-29"><label>[29]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Beskopylny</surname> <given-names>AN</given-names></string-name>, <string-name><surname>Stel&#x2019;makh</surname> <given-names>SA</given-names></string-name>, <string-name><surname>Shcherban&#x2019;</surname> <given-names>EM</given-names></string-name>, <string-name><surname>Mailyan</surname> <given-names>LR</given-names></string-name>, <string-name><surname>Meskhi</surname> <given-names>B</given-names></string-name>, <string-name><surname>Razveeva</surname> <given-names>I</given-names></string-name>, <etal>et al</etal></person-group>. <article-title>Concrete strength prediction using machine learning methods CatBoost, k-nearest neighbors, support vector regression</article-title>. <source>Appl Sci</source>. <year>2022</year>;<volume>12</volume>(<issue>21</issue>):<fpage>10864</fpage>. doi:<pub-id pub-id-type="doi">10.3390/app122110864</pub-id>.</mixed-citation></ref>
<ref id="ref-30"><label>[30]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Abbood</surname> <given-names>IS</given-names></string-name>, <string-name><surname>Rahman</surname> <given-names>NA</given-names></string-name>, <string-name><surname>Bakar</surname> <given-names>BHA</given-names></string-name></person-group>. <article-title>Shear strength prediction for RCDBs utilizing data-driven machine learning approach: enhanced CatBoost with SHAP and PDPs analyses</article-title>. <source>Appl Syst Innov</source>. <year>2025</year>;<volume>8</volume>(<issue>4</issue>):<fpage>96</fpage>. doi:<pub-id pub-id-type="doi">10.3390/asi8040096</pub-id>.</mixed-citation></ref>
</ref-list>
</back></article>










