<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.1 20151215//EN" "http://jats.nlm.nih.gov/publishing/1.1/JATS-journalpublishing1.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:mml="http://www.w3.org/1998/Math/MathML" xml:lang="en" article-type="research-article" dtd-version="1.1">
<front>
<journal-meta>
<journal-id journal-id-type="pmc">CMC</journal-id>
<journal-id journal-id-type="nlm-ta">CMC</journal-id>
<journal-id journal-id-type="publisher-id">CMC</journal-id>
<journal-title-group>
<journal-title>Computers, Materials &#x0026; Continua</journal-title>
</journal-title-group>
<issn pub-type="epub">1546-2226</issn>
<issn pub-type="ppub">1546-2218</issn>
<publisher>
<publisher-name>Tech Science Press</publisher-name>
<publisher-loc>USA</publisher-loc>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">35710</article-id>
<article-id pub-id-type="doi">10.32604/cmc.2023.035710</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Article</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>An Automated System for Early Prediction of Miscarriage in the First Trimester Using Machine Learning</article-title>
<alt-title alt-title-type="left-running-head">An Automated System for Early Prediction of Miscarriage in the First Trimester Using Machine Learning</alt-title>
<alt-title alt-title-type="right-running-head">An Automated System for Early Prediction of Miscarriage in the First Trimester Using Machine Learning</alt-title>
</title-group>
<contrib-group>
<contrib id="author-1" contrib-type="author">
<name name-style="western"><surname>Aljameel</surname><given-names>Sumayh S.</given-names>
</name><xref ref-type="aff" rid="aff-1">1</xref></contrib>
<contrib id="author-2" contrib-type="author">
<name name-style="western"><surname>Aljabri</surname><given-names>Malak</given-names>
</name><xref ref-type="aff" rid="aff-1">1</xref>
<xref ref-type="aff" rid="aff-2">2</xref></contrib>
<contrib id="author-3" contrib-type="author">
<name name-style="western"><surname>Aslam</surname><given-names>Nida</given-names>
</name><xref ref-type="aff" rid="aff-1">1</xref></contrib>
<contrib id="author-4" contrib-type="author" corresp="yes">
<name name-style="western"><surname>Alomari</surname><given-names>Dorieh M.</given-names>
</name><xref ref-type="aff" rid="aff-3">3</xref><email>2180007089@iau.edu.sa</email></contrib>
<contrib id="author-5" contrib-type="author">
<name name-style="western"><surname>Alyahya</surname><given-names>Arwa</given-names>
</name><xref ref-type="aff" rid="aff-1">1</xref></contrib>
<contrib id="author-6" contrib-type="author">
<name name-style="western"><surname>Alfaris</surname><given-names>Shaykhah</given-names>
</name><xref ref-type="aff" rid="aff-1">1</xref></contrib>
<contrib id="author-7" contrib-type="author">
<name name-style="western"><surname>Balharith</surname><given-names>Maha</given-names>
</name><xref ref-type="aff" rid="aff-1">1</xref></contrib>
<contrib id="author-8" contrib-type="author">
<name name-style="western"><surname>Abahussain</surname><given-names>Hiessa</given-names>
</name><xref ref-type="aff" rid="aff-1">1</xref></contrib>
<contrib id="author-9" contrib-type="author">
<name name-style="western"><surname>Boujlea</surname><given-names>Dana</given-names>
</name><xref ref-type="aff" rid="aff-1">1</xref></contrib>
<contrib id="author-10" contrib-type="author">
<name name-style="western"><surname>Alsulmi</surname><given-names>Eman S.</given-names>
</name><xref ref-type="aff" rid="aff-4">4</xref></contrib>
<aff id="aff-1"><label>1</label><institution>Department of Computer Science, College of Computer Science and Information Technology, Imam Abdulrahman Bin Faisal University, P.O. Box 1982</institution>, <addr-line>Dammam, 31441</addr-line>, <country>Saudi Arabia</country></aff>
<aff id="aff-2"><label>2</label><institution>Computer Science Department, College of Computer and Information Systems, Umm Al-Qura University</institution>, <addr-line>Makkah, 21955</addr-line>, <country>Saudi Arabia</country></aff>
<aff id="aff-3"><label>3</label><institution>Department of Computer Engineering, College of Computer Science and Information Technology, Imam Abdulrahman Bin Faisal University, P.O. Box 1982</institution>, <addr-line>Dammam, 31441</addr-line>, <country>Saudi Arabia</country></aff>
<aff id="aff-4"><label>4</label><institution>Department of Obstetrics and Gynecology, College of Medicine, Imam Abdulrahman Bin Faisal University</institution>, <addr-line>Dammam</addr-line>, <country>Saudi Arabia</country></aff>
</contrib-group>
<author-notes>
<corresp id="cor1"><label>&#x002A;</label>Corresponding Author: Dorieh M. Alomari. Email: <email>2180007089@iau.edu.sa</email></corresp>
</author-notes>
<pub-date date-type="collection" publication-format="electronic"><year>2023</year></pub-date>
<pub-date date-type="pub" publication-format="electronic"><day>24</day><month>1</month><year>2023</year></pub-date>
<volume>75</volume>
<issue>1</issue>
<fpage>1291</fpage>
<lpage>1304</lpage>
<history>
<date date-type="received"><day>01</day><month>9</month><year>2022</year></date>
<date date-type="accepted"><day>08</day><month>12</month><year>2022</year></date>
</history>
<permissions>
<copyright-statement>&#x00A9; 2023 Aljameel et al.</copyright-statement>
<copyright-year>2023</copyright-year>
<copyright-holder>Aljameel et al.</copyright-holder>
<license xlink:href="https://creativecommons.org/licenses/by/4.0/">
<license-p>This work is licensed under a <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution 4.0 International License</ext-link>, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
</license>
</permissions>
<self-uri content-type="pdf" xlink:href="TSP_CMC_35710.pdf"></self-uri>
<abstract><p>Currently, the risk factors of pregnancy loss are increasing and are considered a major challenge because they vary between cases. The early prediction of miscarriage can help pregnant ladies to take the needed care and avoid any danger. Therefore, an intelligent automated solution must be developed to predict the risk factors for pregnancy loss at an early stage to assist with accurate and effective diagnosis. Machine learning (ML)-based decision support systems are increasingly used in the healthcare sector and have achieved notable performance and objectiveness in disease prediction and prognosis. Thus, we developed a model to help obstetricians predict the probability of miscarriage using ML. And support their decisions and expectations about pregnancy status by providing an easy, automated way to predict miscarriage at early stages using ML tools and techniques. Although many published papers proposed similar models, none of them used Saudi clinical data. Our proposed solution used ML classification algorithms to build a miscarriage prediction model. Four classifiers were used in this study: decision tree (DT), random forest (RF), k-nearest neighbor (KNN), and gradient boosting (GB). Accuracy, Precision, Recall, F1-score, and receiver operating characteristic area under the curve (ROC-AUC) were used to evaluate the proposed model. The results showed that GB overperformed the other classifiers with an accuracy of 93.4% and ROC-AUC of 97%. This proposed model can assist in the early identification of at-risk pregnant women to avoid miscarriage in the first trimester and will improve the healthcare sector in Saudi Arabia.</p>
</abstract>
<kwd-group kwd-group-type="author">
<kwd>Miscarriage</kwd>
<kwd>pregnancy</kwd>
<kwd>abortion</kwd>
<kwd>machine learning</kwd>
<kwd>gradient boosting</kwd>
</kwd-group>
</article-meta>
</front>
<body>
<sec id="s1">
<label>1</label><title>Introduction</title>
<p>Pregnancy loss is a common phenomenon; eight out of ten pregnancy losses occur within the first 20 weeks of the pregnancy (the first trimester), which is known as miscarriage [<xref ref-type="bibr" rid="ref-1">1</xref>]. Pregnancy loss has a great negative impact on the number of new births in the world and on the physical and emotional health of women. Furthermore, it can be a serious problem if no action was taken early. Doctors face some difficulties in predicting miscarriage at the early stages and this prevents them from taking the proper actions to avoid its risk. While the early prediction of miscarriage will help doctors and patients to take the needed care and protect the embryo from being lost [<xref ref-type="bibr" rid="ref-1">1</xref>]. Pregnancy loss occurs for multiple reasons that can be related to both physical and psychological health, making it difficult for doctors to identify the leading cause. According to related works and studies, several indicators can cause miscarriage. These predictors include sociodemographic factors, such as age (becoming pregnant at different ages may affect pregnancy status); occupation (some occupations may require more physical exertion than others); and factors related to a pregnant woman&#x2019;s existing health conditions, such as high or low body mass index (BMI) (obese or underweight), high blood pressure, diabetes, cancer, infertility, and other diseases or conditions. In addition, women can experience health conditions that occur only during pregnancy, such as gestational diabetes, preeclampsia, and eclampsia [<xref ref-type="bibr" rid="ref-2">2</xref>,<xref ref-type="bibr" rid="ref-3">3</xref>]. Numerous studies have investigated pregnancy loss using artificial intelligence (AI) and ML to predict the risk factors of miscarriage by collecting data from pregnant women or women who had experienced pregnancy loss [<xref ref-type="bibr" rid="ref-4">4</xref>]. This study developed ML prediction models to help obstetricians make accurate and timely decisions to avoid the risk of miscarriage. The proposed model in this study will be a great enhancement of the healthcare field and will help doctors to take the needed process to reduce miscarriage possibility. Furthermore, this study examined the impact of routine screening blood tests, including the levels of different types of white blood cells (WBCs), red blood cells (RBCs), and platelets, as well as sociodemographic features, including the age, weight, and height. To achieve the research objectives, we obtained clinical data from King Fahad University Hospital, Khobar, Kingdom of Saudi Arabia (KSA). The data contained the records of patients who had miscarriages or normal deliveries. The proposed model can be used as a tool that can identify early signs of at-risk pregnancy.</p>
<p>The main contributions of the study are as follows:
<list list-type="bullet">
<list-item>
<p>To the authors&#x2019; knowledge, this study is the first to predict miscarriage using a dataset from Saudi Arabia.</p></list-item>
<list-item>
<p>The proposed model effectively predicted miscarriage at an early stage with a reduced number of features.</p></list-item>
<list-item>
<p>The model is a fully automated solution for predicting miscarriage.</p></list-item>
<list-item>
<p>The dataset is balanced.</p></list-item>
</list></p>
<p>The study is organized as follows. Section 2 discusses the previous studies related to the problem addressed in the current study. Section 3 describes the material and methods used in this study. Section 4 describes the experimental setup and reports the results. Finally, Section 5 concludes the paper.</p>
</sec>
<sec id="s2">
<label>2</label><title>Related Work</title>
<p>Mora-S&#x00E1;nchez&#x00A0;et&#x00A0;al.&#x00A0;[<xref ref-type="bibr" rid="ref-4">4</xref>] proposed an ML-based prediction model showing the relationship between recurrent miscarriage and the human leukocyte antigen (HLA) genes according to predicted values. Using a support vector machine (SVM) with a linear kernel as a classifier, the study detected the risk of recurrent miscarriage with an accuracy of 67% and an AUC of 71%. Similarly, Bruno&#x00A0;et&#x00A0;al.&#x00A0;[<xref ref-type="bibr" rid="ref-5">5</xref>] also used an SVM model to classify the risk level of patients with recurrent pregnancy loss (RPL). Using 43 features, the model obtained a balanced accuracy of 90.24% &#x00B1; 0.36%. Furthermore, the researchers developed a model using 18 features that obtained a balanced accuracy of 93.85% &#x00B1; 0.34%. However, the dataset contained many missing values for several features, such as age, BMI, activated protein C resistance (APCR), proteins C and S, antithrombin III (AT III), homocysteinemia, and thyroid stimulating hormone (TSH). These values were missing either because their assessment was not prescribed or because the values were within the normal range and thus not included.</p>
<p>Jhee&#x00A0;et&#x00A0;al.&#x00A0;[<xref ref-type="bibr" rid="ref-6">6</xref>] constructed models using ML classifiers to predict late-onset preeclampsia and compared these models with statistical methods. They used six classifiers: logistic regression (LR), DT, na&#x00EF;ve Bayes (NB), SVM, RF, and stochastic gradient boosting (SGB). The SGB model had the best performance, with an accuracy of 97.3%, a sensitivity of 60.3%, and a specificity of 99.1%. In addition, the prediction of late-onset preeclampsia using the ML algorithms surpassed that of the statistical methods. However, the study had limitations related to a lack of first-trimester data for most women who participated in the study because they started the antenatal exam after the early second trimester. Most previous studies have shown that women who develop preeclampsia in the second and third trimesters have significant maternal changes, although some reports have reported some changes in the first trimester. A major study limitation was that the number of patients with preeclampsia incidents was smaller than the number of patients in the control group; however, considering the study sample size, the number of patients with preeclampsia was suitable. Moreover, the sample size included in the study was larger than that of previous studies demonstrating the relationship between clinical biomarkers and the development of preeclampsia. An additional limitation concerned the antenatal evaluation; because of differences in patients&#x2019; symptoms and conditions, the evaluation intervals were diverse.</p>
<p>Another study by Alptekin&#x00A0;et&#x00A0;al.&#x00A0;[<xref ref-type="bibr" rid="ref-7">7</xref>] developed a prediction model for miscarriage based on first-trimester ultrasound findings and maternal characteristics for women with viable single pregnancies. Two models were created using DT to determine genetic abnormalities that could lead to miscarriage. The DT model achieved a sensitivity of 75%, a specificity of 93%, and an AUC of 0.87 &#x00B1; 0.02. However, this study examined only embryonic miscarriage (gestational age: 6&#x2013;9 weeks) and fetal miscarriage (gestational age: 10&#x2013;20 weeks), excluding preclinical or subclinical miscarriages (at or before a gestational age of 6 weeks).</p>
<p>Miyagi&#x00A0;et&#x00A0;al.&#x00A0;[<xref ref-type="bibr" rid="ref-8">8</xref>] proposed a prediction model for the probability of live birth using ML classifiers based on blastocyst images. Six ML methods were used: LR, NB, nearest neighbors, RF, neural network, and SVM. They used 80 images of blastocysts that led to living birth and 80 images that led to aneuploid miscarriages with fivefold cross-validation for classifying embryos. The study concluded that LR was the best classifier, with an AUC of 0.650 &#x00B1; 0.04, a sensitivity of 60%, a specificity of 70%, and an accuracy of 65%.</p>
<p>Another study by Malacova&#x00A0;et&#x00A0;al.&#x00A0;[<xref ref-type="bibr" rid="ref-9">9</xref>] developed a prediction model to support clinical decision-making by easily quantifying stillbirth risk using LR, RF, extreme gradient boosting (XGBoost), and regression tree classifiers. XGBoost outperformed the other classifiers, predicting 45% (95% CI: 43%, 46%) of stillbirths; furthermore, this model predicted 45% (95% CI: 43%, 47%) of stillbirths when pregnancy history was included. The authors noted the limitation of using perinatal records spanning more than three decades; the database changed over time, with more detailed predictor tools becoming available later in the study period. Similarly, Koivu&#x00A0;et&#x00A0;al.&#x00A0;[<xref ref-type="bibr" rid="ref-10">10</xref>] studied risk factors that could be utilized in a clinical setting. The ML classifiers used were LR, deep neural network, and gradient-boosting decision tree. The study used ML models as tools to generate risk prediction models and show the power of improved clinical prediction models. The models were used to predict both early and late stillbirth. For early stillbirth, both the LR model and the deep neural network model achieved an AUC of approximately 73% to 74%; for late stillbirth, the LR model achieved an AUC of 58% to 61%, and the deep neural network achieved an AUC of 54% to 57%. A limitation of this study was that the data contained observations from multiple years, regions, and hospitals.</p>
<p>Liu&#x00A0;et&#x00A0;al.&#x00A0;[<xref ref-type="bibr" rid="ref-11">11</xref>] developed a prediction model for embryonic development using six ML algorithms: LR, SVM, DT, back-propagation neural network (BNN), XGBoost, and RF. This model was developed to help doctors make more accurate decisions in clinical practice. The RF algorithm produced the best result; it had an accuracy of 97% when it included the fetal heart rate (FHR) feature and 99% when it included the embryo transfer (ETD) feature. Asri&#x00A0;et&#x00A0;al.&#x00A0;[<xref ref-type="bibr" rid="ref-12">12</xref>] created a prediction model with the Apache SPARK Databricks platform and ML through the K-means predictive model algorithm. The experimental results showed that the K-means algorithm performed well, clustering the data into three meaningful clusters. The algorithm predicted miscarriage in 44% of the sample would have a miscarriage, no miscarriage in 21% of the sample, and probable miscarriage in 34% of the sample. A limitation of this study was that better results could be obtained with a higher value of K because of the reduced squared error. However, a higher value of K may not have produced useful and meaningful clusters. Therefore, it was difficult to choose the correct number of clusters, and a result with 100% accuracy was rare because real data are complex. Another study by Mu&#x00A0;et&#x00A0;al.&#x00A0;[<xref ref-type="bibr" rid="ref-13">13</xref>] detected and classified the adverse outcomes of pregnancy before the participants became pregnant. Deep learning (DL) algorithms were applied using a multi-layer neural network (MLP) and DT. The researchers&#x2019; model provided an accuracy of 89.2%, outperforming two other models, a five-layer NN (85.9% accuracy) and a DT model (79.5% accuracy). <xref ref-type="table" rid="table-1">Table 1</xref>. summarizes the reviewed papers.</p>
<table-wrap id="table-1">
<label>Table 1</label>
<caption><title>Summary of the related studies</title>
</caption>
<table frame="hsides">
<colgroup>
<col align="left"/>
<col align="left"/>
<col align="left"/>
<col align="left"/>
<col align="left"/>
<col align="left"/>
<col align="left"/>
<col align="left"/>
</colgroup>
<thead>
<tr>
<th>Reference</th>
<th>Objective</th>
<th>Technique</th>
<th>Dataset</th>
<th>Sample<break/> location</th>
<th>Sample<break/> Size</th>
<th>Gestational<break/> age</th>
<th>Results</th>
</tr>
</thead>
<tbody>
<tr>
<td>[<xref ref-type="bibr" rid="ref-4">4</xref>]</td>
<td>Analyzed HLA haplotypes from couples with either history of successful pregnancy or RM</td>
<td>SVM, linear kernel</td>
<td>Genetic data</td>
<td>Poland</td>
<td>190 samples</td>
<td>&#x2013;</td>
<td>Accuracy of 0.67, AUC of 0.71</td>
</tr>
<tr>
<td>[<xref ref-type="bibr" rid="ref-6">6</xref>]</td>
<td>Classified the level of risk<break/> of patients with RPL in different risk classes</td>
<td>SVM</td>
<td>Clinical data with<break/> 43 medical<break/> features</td>
<td>Italy</td>
<td>734 samples</td>
<td>&#x2013;</td>
<td>Accuracy of 93.85%</td>
</tr>
<tr>
<td>[<xref ref-type="bibr" rid="ref-5">5</xref>]</td>
<td>Developed models to<break/> predict late-onset preeclampsia and<break/> compared them to<break/> statistical methods</td>
<td>LR, DT, NB, SVM, GB,<break/> SGB</td>
<td>Clinical data collected from hospital electronic medical records</td>
<td>Korea</td>
<td>11,006<break/>samples</td>
<td>34 weeks</td>
<td>Accuracy of 97.3%</td>
</tr>
<tr>
<td>[<xref ref-type="bibr" rid="ref-7">7</xref>]</td>
<td>Predicted miscarriage in women with a viable single pregnancy from first trimester</td>
<td>DT</td>
<td>Ultrasound features and clinical data</td>
<td>Turkey</td>
<td>415 samples</td>
<td>6&#x2013;10 weeks</td>
<td>AUC of 0.87 &#x00B1; 0.02</td>
</tr>
<tr>
<td>[<xref ref-type="bibr" rid="ref-8">8</xref>]</td>
<td>Predicted the probability<break/> of achieving live birth</td>
<td>LR, RF, DT, NB, KNN, ANN</td>
<td>Blastocyst image</td>
<td>Japan</td>
<td>160<break/>images</td>
<td>&#x2013;</td>
<td>Accuracy of 65%</td>
</tr>
<tr>
<td>[<xref ref-type="bibr" rid="ref-9">9</xref>]</td>
<td>Developed a prediction<break/> model to easily quantify stillbirth risks</td>
<td>DT, RF, <break/>GB</td>
<td>Clinical data with various medical features of the sample</td>
<td>Australia</td>
<td>947,025 livebirths and 5,788 stillbirths</td>
<td>&#x2265;20 weeks</td>
<td>Predicted 45% (95% CI: 43%, 46%) of stillbirths</td>
</tr>
<tr>
<td>[<xref ref-type="bibr" rid="ref-10">10</xref>]</td>
<td>Identified unfamiliar risk models that could be<break/>utilized in a clinical setting</td>
<td>LR, DNN, GB</td>
<td>Clinical data containing various medical features of the tested sample</td>
<td>United States</td>
<td>364,124 samples</td>
<td></td>
<td>AUC of 0.74</td>
</tr>
<tr>
<td>[<xref ref-type="bibr" rid="ref-11">11</xref>]</td>
<td>Provided decision support for doctors who are<break/>relatively inexperienced in clinical practice</td>
<td>LR, RF, DNN, DT, SVM</td>
<td>Historical case data with six medical features</td>
<td>China</td>
<td>31,030 samples</td>
<td>First trimester (6&#x2013;12 weeks)</td>
<td>Accuracy of 99%</td>
</tr>
<tr>
<td>[<xref ref-type="bibr" rid="ref-12">12</xref>]</td>
<td>Predicted real-time miscarriage</td>
<td>K-means</td>
<td>Medical data gathered from pregnant women via mobile phone</td>
<td></td>
<td>100,000<break/>samples</td>
<td></td>
<td>Error rate of 10%</td>
</tr>
<tr>
<td>[<xref ref-type="bibr" rid="ref-13">13</xref>]</td>
<td>Detected and classified adverse outcomes of pregnancy before<break/>becoming pregnant</td>
<td>DNN, DT</td>
<td>Clinical data containing various medical features of the tested sample</td>
<td>China</td>
<td>75,542<break/>samples</td>
<td>&#x2013;</td>
<td>Accuracy of 89.2%</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>While another study proposed a software-based solution to enhance the healthcare sector. Bhatti&#x00A0;et&#x00A0;al.&#x00A0;[<xref ref-type="bibr" rid="ref-14">14</xref>] developed an android application for healthcare strengthening that improves the process of communication and collecting data between hospitals and reduces the consumption of time and effort of management staff. To collect the needed data for the proposed system, the authors developed a questionnaire to interview the staff included in using the system. The staff was consisting of both the software engineers who are responsible for the input and processing of the data and the monitors who are responsible for the resulting reports. After collecting the data, it was sent to the centralized server for analysis. The healthcare staff was very satisfied with the proposed application as it helped save time and effort. The system had been used in Pakistan in 24 districts and proved its success in all these areas. However, the proposed system is very recent and needs more time to be generalized across the entire country.</p>
</sec>
<sec id="s3">
<label>3</label><title>Material and Methods</title>
<p>This section contains the dataset description and the methodology used to build the proposed model. To train the model, a Saudi clinical dataset was collected and cleaned. This will help in reducing the miscarriage percentage in Saudi Arabia. Several ML classifiers were used. These classifiers were selected according to the findings of the literature review, as the best classifiers were selected to experiment with their performance on our dataset. The evaluation metrics were used to compare the performance of the proposed models. <xref ref-type="fig" rid="fig-1">Fig. 1</xref>. summarizes the proposed methodology.</p>
<fig id="fig-1">
<label>Figure 1</label>
<caption><title>Methodology diagram</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMC_35710-fig-1.tif"/>
</fig>
<sec id="s3_1">
<label>3.1</label><title>Data Collection</title>
<p>The study was performed using retrospective data from King Fahad University Hospital, Khobar, KSA. The study was approved with institutional review board (IRB) no. UGS-2021-09-057. The dataset contained 23 clinical attributes of 981 pregnant women with normal delivery or miscarriage. All the attributes in the dataset were numeric except for the final attribute. The numbers of cases of normal delivery (529) and miscarriage (425) were almost balanced, as shown in <xref ref-type="fig" rid="fig-2">Fig. 2</xref>. The data were collected during the first trimester. The average age of the pregnant women with normal delivery was 31.5 years, whereas the average age of those with miscarriages was 32.7 years. The weights of women with normal delivery and miscarriage outcomes were similar (72 kilogram).</p>
<fig id="fig-2">
<label>Figure 2</label>
<caption><title>Number of samples per category in the dataset</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMC_35710-fig-2.tif"/>
</fig>
</sec>
<sec id="s3_2">
<label>3.2</label><title>Data Preprocessing</title>
<p>Data preprocessing was applied to prepare the dataset for the training of the proposed model. This included cleaning the data to remove noise and handle missing values. Initially, the dataset was normalized. Normalization is a method of scaling numeric data to a specific range [<xref ref-type="bibr" rid="ref-15">15</xref>]. Because the features of the current dataset were all numeric features, but their ranges were very wide, min&#x2013;max normalization techniques were applied to all features of the dataset so that their ranges were all between 0 and 1. The formula for min&#x2013;max normalization is represented in <xref ref-type="disp-formula" rid="eqn-1">Eq. (1)</xref>.</p>
<p><disp-formula id="eqn-1"><label>(1)</label><mml:math id="mml-eqn-1" display="block"><mml:mi>X</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi>X</mml:mi><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>m</mml:mi><mml:mi>i</mml:mi><mml:mi>n</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mrow><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>m</mml:mi><mml:mi>a</mml:mi><mml:mi>x</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>m</mml:mi><mml:mi>i</mml:mi><mml:mi>n</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac></mml:math></disp-formula></p>
<p>Subsequently, the normalization encoding was applied to the class attributes. After initial preprocessing, the correlations between the features and the attributes were assessed. The correlation of each feature with the target class was key for demonstrating the effect of a feature on the class attribute; some features played important roles in determining the class value, whereas other features had very small effects on the class value. Moreover, features could have positive or negative impacts. <xref ref-type="table" rid="table-2">Table 2</xref>. shows the attribute descriptions and the correlation between each feature and the target class.</p>
<table-wrap id="table-2">
<label>Table 2</label>
<caption><title>Attribute description and correlation with the target</title>
</caption>
<table frame="hsides" >
<colgroup>
<col align="left"/>
<col align="left"/>
<col align="left"/>
<col align="left"/>
<col align="left"/>
</colgroup>
<thead>
<tr>
<th>#</th>
<th>Attribute<break/>category</th>
<th>Attribute</th>
<th>Description</th>
<th>Correlation</th>
</tr>
</thead>
<tbody>
<tr>
<td>1</td>
<td><bold>Demographic</bold></td>
<td><bold>Age</bold></td>
<td>Patient&#x2019;s age</td>
<td>0.086723</td>
</tr>
<tr>
<td>2</td>
<td/>
<td><bold>Height</bold></td>
<td>Patient&#x2019;s height</td>
<td>0.218346</td>
</tr>
<tr>
<td>3</td>
<td/>
<td><bold>Weight</bold></td>
<td>Patient&#x2019;s weight</td>
<td>0.033858</td>
</tr>
<tr>
<td>4</td>
<td><bold>Hematological</bold></td>
<td><bold>White blood cells</bold></td>
<td>White blood cell count</td>
<td>&#x2212;0.176410</td>
</tr>
<tr>
<td>5</td>
<td/>
<td><bold>Red blood cells</bold></td>
<td>Red blood cell count</td>
<td>&#x2212;0.058829</td>
</tr>
<tr>
<td>6</td>
<td/>
<td><bold>Hemoglobin</bold></td>
<td>Hemoglobin count</td>
<td>0.066638</td>
</tr>
<tr>
<td>7</td>
<td/>
<td><bold>Hematocrit</bold></td>
<td>Hematocrit level</td>
<td>&#x2212;0.028201</td>
</tr>
<tr>
<td>8</td>
<td/>
<td><bold>MCV</bold></td>
<td>Mean corpuscular volume level</td>
<td>0.028968</td>
</tr>
<tr>
<td>9</td>
<td/>
<td><bold>MCH</bold></td>
<td>Mean corpuscular hemoglobin level</td>
<td>0.083064</td>
</tr>
<tr>
<td>10</td>
<td/>
<td><bold>MCHC</bold></td>
<td>Mean corpuscular hemoglobin<break/> concentration</td>
<td>0.315363</td>
</tr>
<tr>
<td>11</td>
<td/>
<td><bold>RDW</bold></td>
<td>Red cell distribution width</td>
<td>&#x2212;0.137509</td>
</tr>
<tr>
<td>12</td>
<td/>
<td><bold>MPV</bold></td>
<td>Mean platelet volume</td>
<td>&#x2212;0.200866</td>
</tr>
<tr>
<td>13</td>
<td/>
<td><bold>Platelets</bold></td>
<td>Platelet level</td>
<td>0.234775</td>
</tr>
<tr>
<td>14</td>
<td/>
<td><bold>Eosinophil&#x2014;Instrument%</bold></td>
<td>Eosinophils&#x2014;Instrument percentage</td>
<td>0.254486</td>
</tr>
<tr>
<td>15</td>
<td/>
<td><bold>Eosinophil&#x2014;Instrument Abso.</bold></td>
<td>Eosinophils&#x2014;Instrument absolute value</td>
<td>0.292272</td>
</tr>
<tr>
<td>16</td>
<td/>
<td><bold>Basophil&#x2014;Instrument%</bold></td>
<td>Basophils&#x2014;Instrument percentage</td>
<td>0.384456</td>
</tr>
<tr>
<td>17</td>
<td/>
<td><bold>Basophil&#x2014;Instrument Abso.</bold></td>
<td>Basophils&#x2014;Instrument absolute value</td>
<td>&#x2212;0.276348</td>
</tr>
<tr>
<td>18</td>
<td/>
<td><bold>Lymphocyte&#x2014;Instrument%</bold></td>
<td>Lymphocytes&#x2014;Instrument percentage</td>
<td>0.142342</td>
</tr>
<tr>
<td>19</td>
<td/>
<td><bold>Lymphocyte&#x2014;Instrument Abso.</bold></td>
<td>Lymphocyte&#x2014;Instrument absolute value</td>
<td>&#x2212;0.062244</td>
</tr>
<tr>
<td>20</td>
<td/>
<td><bold>Monocyte&#x2014;Instrument%</bold></td>
<td>Monocytes&#x2014;Instrument percentage</td>
<td>&#x2212;0.186689</td>
</tr>
<tr>
<td>21</td>
<td/>
<td><bold>Monocyte&#x2014;Instrument Abso.</bold></td>
<td>Monocytes&#x2014;Instrument absolute value</td>
<td>&#x2212;0.233695</td>
</tr>
<tr>
<td>22</td>
<td/>
<td><bold>Neutrophil granulocyte&#x2014;Instrument%</bold></td>
<td>Neutrophil granulocytes&#x2014;Instrument percentage</td>
<td>&#x2212;0.130288</td>
</tr>
<tr>
<td>23</td>
<td/>
<td><bold>Neutrophil granulocyte</bold><break/><bold>&#x2014;Instrument Abso.</bold></td>
<td>Neutrophil granulocytes&#x2014;Instrument absolute values</td>
<td>&#x2212;0.222571</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>As shown in <xref ref-type="table" rid="table-2">Table 2</xref>. above, most of the features were weakly correlated with the target class. The highest correlation was observed between the basophil percentage and the class value (0.384456).</p>

</sec>
<sec id="s3_3">
<label>3.3</label><title>Classification Models</title>
<p>Several classification models were applied, namely RF, KNN, GB, and DT. The section below describes the classifiers used in this study.</p>
<sec id="s3_3_1">
<label>3.3.1</label><title>K-Nearest Neighbors</title>
<p>KNN is a frequently used supervised ML algorithm; it is a nonparametric method that is used for both classification and regression problems. The algorithm determines the new observation classification by calculating the distance metric between the test instance and all the training instances, after which it selects KNN instances [<xref ref-type="bibr" rid="ref-15">15</xref>]. Various distance measure functions can be used to calculate the distance; the most widely used functions are the Euclidean distance, illustrated in <xref ref-type="disp-formula" rid="eqn-2">Eq. (2)</xref>; the Manhattan distance, represented in <xref ref-type="disp-formula" rid="eqn-3">Eq. (3)</xref>; and the Minkowski metric, shown in <xref ref-type="disp-formula" rid="eqn-4">Eq. (4)</xref>:</p>
<p><disp-formula id="eqn-2"><label>(2)</label><mml:math id="mml-eqn-2" display="block"><mml:msqrt><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:munderover><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:mrow><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:msup><mml:mo stretchy="false">)</mml:mo><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:msqrt></mml:math></disp-formula></p>
<p><disp-formula id="eqn-3"><label>(3)</label><mml:math id="mml-eqn-3" display="block"><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:munderover><mml:mrow><mml:mo>|</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>|</mml:mo></mml:mrow></mml:math></disp-formula></p>
<p><disp-formula id="eqn-4"><label>(4)</label><mml:math id="mml-eqn-4" display="block"><mml:msup><mml:mrow><mml:mo>(</mml:mo><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>K</mml:mi></mml:mrow></mml:munderover><mml:msup><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mo>|</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>|</mml:mo></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mrow><mml:mi>q</mml:mi></mml:mrow></mml:msup><mml:mo>)</mml:mo></mml:mrow><mml:mrow><mml:mfrac><mml:mn>1</mml:mn><mml:mi>q</mml:mi></mml:mfrac></mml:mrow></mml:msup></mml:math></disp-formula>where x is the test sample and y is the training sample.</p>
</sec>
<sec id="s3_3_2">
<label>3.3.2</label><title>Decision Tree</title>
<p>DT is a supervised ML algorithm that can be used to solve regression and classification problems. It consists of a hierarchical tree structure that is similar to flow charts [<xref ref-type="bibr" rid="ref-15">15</xref>]. The most frequently used method is implementing a top-down greedy search to calculate the entropy see <xref ref-type="disp-formula" rid="eqn-5">Eq. (5)</xref> and information gain (IG) see <xref ref-type="disp-formula" rid="eqn-6">Eq. (6)</xref> for each class:</p>
<p><disp-formula id="eqn-5"><label>(5)</label><mml:math id="mml-eqn-5" display="block"><mml:mi>E</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mi>T</mml:mi><mml:mo>,</mml:mo><mml:mi>X</mml:mi><mml:mo>)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:munder><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>c</mml:mi><mml:mo>&#x2208;</mml:mo><mml:mi>X</mml:mi></mml:mrow></mml:munder><mml:mi>P</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mi>c</mml:mi><mml:mo>)</mml:mo></mml:mrow><mml:mi>E</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mi>c</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:math></disp-formula></p>
<p><disp-formula id="eqn-6"><label>(6)</label><mml:math id="mml-eqn-6" display="block"><mml:mi>G</mml:mi><mml:mi>a</mml:mi><mml:mi>i</mml:mi><mml:mi>n</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mi>T</mml:mi><mml:mo>,</mml:mo><mml:mi>X</mml:mi><mml:mo>)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mrow><mml:mi mathvariant="italic">E</mml:mi><mml:mi mathvariant="italic">n</mml:mi><mml:mi mathvariant="italic">t</mml:mi><mml:mi mathvariant="italic">r</mml:mi><mml:mi mathvariant="italic">o</mml:mi><mml:mi mathvariant="italic">p</mml:mi><mml:mi mathvariant="italic">y</mml:mi></mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>T</mml:mi><mml:mo>)</mml:mo></mml:mrow><mml:mo>&#x2212;</mml:mo><mml:mrow><mml:mi mathvariant="italic">E</mml:mi><mml:mi mathvariant="italic">n</mml:mi><mml:mi mathvariant="italic">t</mml:mi><mml:mi mathvariant="italic">r</mml:mi><mml:mi mathvariant="italic">o</mml:mi><mml:mi mathvariant="italic">p</mml:mi><mml:mi mathvariant="italic">y</mml:mi></mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>T</mml:mi><mml:mo>,</mml:mo><mml:mi>X</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:math></disp-formula></p>
</sec>
<sec id="s3_3_3">
<label>3.3.3</label><title>Random Forest</title>
<p>RF is a supervised ML algorithm that is applicable in classification and regression problems [<xref ref-type="bibr" rid="ref-15">15</xref>]. The technique is a tree-based ensemble that consists of multiple DTs; each tree represents random variables. The Gini index <xref ref-type="disp-formula" rid="eqn-7">Eq. (7)</xref> is used to determine the branch and spread of nodes on a DT, and <xref ref-type="disp-formula" rid="eqn-8">Eq. (8)</xref> (entropy) is used to select the branches of the nodes as it computes the probability of a certain outcome:</p>
<p><disp-formula id="eqn-7"><label>(7)</label><mml:math id="mml-eqn-7" display="block"><mml:mi>G</mml:mi><mml:mi>i</mml:mi><mml:mi>n</mml:mi><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mo>&#x2212;</mml:mo><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>c</mml:mi></mml:mrow></mml:munderover><mml:msup><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mi>P</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:math></disp-formula></p>
<p><disp-formula id="eqn-8"><label>(8)</label><mml:math id="mml-eqn-8" display="block"><mml:mrow><mml:mi mathvariant="italic">E</mml:mi><mml:mi mathvariant="italic">n</mml:mi><mml:mi mathvariant="italic">t</mml:mi><mml:mi mathvariant="italic">r</mml:mi><mml:mi mathvariant="italic">o</mml:mi><mml:mi mathvariant="italic">p</mml:mi><mml:mi mathvariant="italic">y</mml:mi></mml:mrow><mml:mo>=</mml:mo><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>c</mml:mi></mml:mrow></mml:munderover><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>p</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2217;</mml:mo><mml:msub><mml:mi>log</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo>&#x2061;</mml:mo><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mi>p</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow></mml:math></disp-formula>where p<sub>i</sub> is the probability of class i.</p>
</sec>
<sec id="s3_3_4">
<label>3.3.4</label><title>Gradient Boosting</title>
<p>The gradient boosting (GB) algorithm is an ML algorithm that uses ensembled methods, such as boosting, to build a model with high performance; it can be used for classification and regression problems. The algorithm is based on combining multiple trees with low accuracy rates to create an enhanced classifier with better performance and higher accuracy results than the original trees [<xref ref-type="bibr" rid="ref-15">15</xref>].</p>
</sec>
</sec>
<sec id="s3_4">
<label>3.4</label><title>Evaluation Metrics</title>
<p>The evaluation metrics used to evaluate the proposed model were accuracy, precision, recall, F1 score, and ROC-AUC.</p>
<p>A confusion matrix was used to measure the performance of the model by comparing the predicted values with the true values [<xref ref-type="bibr" rid="ref-15">15</xref>,<xref ref-type="bibr" rid="ref-16">16</xref>]; the abbreviations TP, TN, FP, and FN indicate true positive, true negative, false positive, and false negative, respectively.</p>
<p>The accuracy <xref ref-type="disp-formula" rid="eqn-9">Eq. (9)</xref> represents the percentage of the correctly classified instances among all the instances in the testing set:</p>
<p><disp-formula id="eqn-9"><label>(9)</label><mml:math id="mml-eqn-9" display="block"><mml:mrow><mml:mi mathvariant="italic">A</mml:mi><mml:mi mathvariant="italic">c</mml:mi><mml:mi mathvariant="italic">c</mml:mi><mml:mi mathvariant="italic">u</mml:mi><mml:mi mathvariant="italic">r</mml:mi><mml:mi mathvariant="italic">a</mml:mi><mml:mi mathvariant="italic">c</mml:mi><mml:mi mathvariant="italic">y</mml:mi></mml:mrow><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi>T</mml:mi><mml:mi>P</mml:mi><mml:mo>+</mml:mo><mml:mi>T</mml:mi><mml:mi>N</mml:mi></mml:mrow><mml:mrow><mml:mi>T</mml:mi><mml:mi>P</mml:mi><mml:mo>+</mml:mo><mml:mi>T</mml:mi><mml:mi>N</mml:mi><mml:mo>+</mml:mo><mml:mi>F</mml:mi><mml:mi>P</mml:mi><mml:mo>+</mml:mo><mml:mi>F</mml:mi><mml:mi>N</mml:mi></mml:mrow></mml:mfrac></mml:math></disp-formula></p>
<p>The precision <xref ref-type="disp-formula" rid="eqn-10">Eq. (10)</xref> represents the percentage of the correctly classified instances in the positive class among all the positive classifications:</p>
<p><disp-formula id="eqn-10"><label>(10)</label><mml:math id="mml-eqn-10" display="block"><mml:mrow><mml:mi mathvariant="italic">P</mml:mi><mml:mi mathvariant="italic">r</mml:mi><mml:mi mathvariant="italic">e</mml:mi><mml:mi mathvariant="italic">c</mml:mi><mml:mi mathvariant="italic">i</mml:mi><mml:mi mathvariant="italic">s</mml:mi><mml:mi mathvariant="italic">i</mml:mi><mml:mi mathvariant="italic">o</mml:mi><mml:mi mathvariant="italic">n</mml:mi></mml:mrow><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi>T</mml:mi><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mi>T</mml:mi><mml:mi>P</mml:mi><mml:mo>+</mml:mo><mml:mi>F</mml:mi><mml:mi>P</mml:mi></mml:mrow></mml:mfrac></mml:math></disp-formula></p>
<p>The recall <xref ref-type="disp-formula" rid="eqn-11">Eq. (11)</xref> (also known as sensitivity) represents the percentage of the positive instances that were correctly classified among all the true-positive instances:</p>
<p><disp-formula id="eqn-11"><label>(11)</label><mml:math id="mml-eqn-11" display="block"><mml:mrow><mml:mi mathvariant="italic">R</mml:mi><mml:mi mathvariant="italic">e</mml:mi><mml:mi mathvariant="italic">c</mml:mi><mml:mi mathvariant="italic">a</mml:mi><mml:mi mathvariant="italic">l</mml:mi><mml:mi mathvariant="italic">l</mml:mi></mml:mrow><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi>T</mml:mi><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mi>T</mml:mi><mml:mi>P</mml:mi><mml:mo>+</mml:mo><mml:mi>F</mml:mi><mml:mi>N</mml:mi></mml:mrow></mml:mfrac></mml:math></disp-formula></p>
<p>The F score <xref ref-type="disp-formula" rid="eqn-12">Eq. (12)</xref> represents the average of the correctly classified instances in the positive class (precision) and the positive instances that were correctly classified (recall). It is used to evaluate the balance of a model&#x2019;s predictions between the two classes:</p>
<p><disp-formula id="eqn-12"><label>(12)</label><mml:math id="mml-eqn-12" display="block"><mml:mi>F</mml:mi><mml:mtext>&#xA0;</mml:mtext><mml:mrow><mml:mi mathvariant="italic">s</mml:mi><mml:mi mathvariant="italic">c</mml:mi><mml:mi mathvariant="italic">o</mml:mi><mml:mi mathvariant="italic">r</mml:mi><mml:mi mathvariant="italic">e</mml:mi></mml:mrow><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mn>2</mml:mn><mml:mo>&#x00D7;</mml:mo><mml:mrow><mml:mi mathvariant="italic">P</mml:mi><mml:mi mathvariant="italic">r</mml:mi><mml:mi mathvariant="italic">e</mml:mi><mml:mi mathvariant="italic">c</mml:mi><mml:mi mathvariant="italic">i</mml:mi><mml:mi mathvariant="italic">s</mml:mi><mml:mi mathvariant="italic">i</mml:mi><mml:mi mathvariant="italic">o</mml:mi><mml:mi mathvariant="italic">n</mml:mi></mml:mrow><mml:mo>&#x00D7;</mml:mo><mml:mrow><mml:mi mathvariant="italic">R</mml:mi><mml:mi mathvariant="italic">e</mml:mi><mml:mi mathvariant="italic">c</mml:mi><mml:mi mathvariant="italic">a</mml:mi><mml:mi mathvariant="italic">l</mml:mi><mml:mi mathvariant="italic">l</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mrow><mml:mi mathvariant="italic">P</mml:mi><mml:mi mathvariant="italic">r</mml:mi><mml:mi mathvariant="italic">e</mml:mi><mml:mi mathvariant="italic">c</mml:mi><mml:mi mathvariant="italic">i</mml:mi><mml:mi mathvariant="italic">s</mml:mi><mml:mi mathvariant="italic">i</mml:mi><mml:mi mathvariant="italic">o</mml:mi><mml:mi mathvariant="italic">n</mml:mi></mml:mrow><mml:mo>+</mml:mo><mml:mrow><mml:mi mathvariant="italic">R</mml:mi><mml:mi mathvariant="italic">e</mml:mi><mml:mi mathvariant="italic">c</mml:mi><mml:mi mathvariant="italic">a</mml:mi><mml:mi mathvariant="italic">l</mml:mi><mml:mi mathvariant="italic">l</mml:mi></mml:mrow></mml:mrow></mml:mfrac></mml:math></disp-formula></p>
<p>The ROC AUC shows the performance of the model at all the classification thresholds. It plots two parameters, TPR and FPR, at different thresholds.</p>
</sec>
</sec>
<sec id="s4">
<label>4</label><title>Experiments and Results</title>
<p>The experiment was carried out using Python 3.6 on the Jupyter notebook platform. The dataset contained two classes with a total of 981 cases; 80% of cases in each class were used for training, and 20% of cases in each class were used for testing. The target prediction had two values: 0 (normal delivery) and 1 (miscarriage). Parameter optimization was used to select the best hyperparameters for each model to obtain optimal performance. In this study, the grid search cross-validation (CV) algorithm was used to identify the optimal parameters for each model. Grid search CV uses all the possible combinations of the available parameters to determine the optimal combination of parameters. <xref ref-type="table" rid="table-3">Table 3</xref>. shows optimal values for each model after applying grid search (CV), and <xref ref-type="table" rid="table-4">Table 4</xref>. presents the results of the proposed models without parameter optimization. <xref ref-type="table" rid="table-5">Table 5</xref>. provides the results of the classifiers after parameter optimization.</p>
<table-wrap id="table-3">
<label>Table 3</label>
<caption><title>Optimal parameters of the classifiers after grid search optimization</title>
</caption>
<table frame="hsides" >
<colgroup>
<col align="left"/>
<col align="left"/>
<col align="left"/>
</colgroup>
<thead>
<tr>
<th>Classifier</th>
<th>Parameter</th>
<th>Optimal value</th>
</tr>
</thead>
<tbody>
<tr>
<td rowspan="3">RF</td>
<td>Criterion</td>
<td>Entropy</td>
</tr>
<tr>
<td>Max depth</td>
<td>10</td>
</tr>
<tr>
<td>Number estimators</td>
<td>100</td>
</tr>
<tr>
<td>KNN</td>
<td>K-value</td>
<td>3</td>
</tr>
<tr>
<td rowspan="4">GB</td>
<td>Loss</td>
<td>Exponential</td>
</tr>
<tr>
<td>Max depth</td>
<td>10</td>
</tr>
<tr>
<td>Max features</td>
<td>Log<sub>2</sub></td>
</tr>
<tr>
<td>Number estimators</td>
<td>15</td>
</tr>
<tr>
<td rowspan="2">DT</td>
<td>Criterion</td>
<td>Gini</td>
</tr>
<tr>
<td>Max depth</td>
<td>15</td>
</tr>
</tbody>
</table>
</table-wrap><table-wrap id="table-4">
<label>Table 4</label>
<caption><title>Classifier performance before parameter optimization</title>
</caption>
<table frame="hsides">
<colgroup>
<col align="left"/>
<col align="left"/>
<col align="left"/>
<col align="left"/>
<col align="left"/>
<col align="left"/>
</colgroup>
<thead>
<tr>
<th>Classifier</th>
<th>Precision</th>
<th>Recall</th>
<th>F1 Score</th>
<th>Accuracy</th>
<th>ROC AUC</th>
</tr>
</thead>
<tbody>
<tr>
<td>RF</td>
<td>92%</td>
<td>91%</td>
<td>91%</td>
<td>91.37%</td>
<td>98%</td>
</tr>
<tr>
<td>KNN</td>
<td>81%</td>
<td>81%</td>
<td>81%</td>
<td>80.71%</td>
<td>89%</td>
</tr>
<tr>
<td>GB</td>
<td><bold>93%</bold></td>
<td><bold>93</bold>%</td>
<td><bold>93</bold>%</td>
<td><bold>92.89</bold>%</td>
<td><bold>98</bold>%</td>
</tr>
<tr>
<td>DT</td>
<td>89%</td>
<td>89%</td>
<td>89%</td>
<td>88.83%</td>
<td>88%</td>
</tr>
</tbody>
</table>
</table-wrap><table-wrap id="table-5">
<label>Table 5</label>
<caption><title>Classifier performance after parameter optimization</title>
</caption>
<table frame="hsides">
<colgroup>
<col align="left"/>
<col align="left"/>
<col align="left"/>
<col align="left"/>
<col align="left"/>
<col align="left"/>
</colgroup>
<thead>
<tr>
<th>Classifier</th>
<th>Precision</th>
<th>Recall</th>
<th>F1 Score</th>
<th>Accuracy</th>
<th>ROC AUC</th>
</tr>
</thead>
<tbody>
<tr>
<td>RF</td>
<td>92%</td>
<td>92%</td>
<td>92%</td>
<td>91.87%</td>
<td>97%</td>
</tr>
<tr>
<td>KNN</td>
<td>82%</td>
<td>82%</td>
<td>82%</td>
<td>81.73%</td>
<td>87%</td>
</tr>
<tr>
<td>GB</td>
<td><bold>94</bold>%</td>
<td><bold>93</bold>%</td>
<td><bold>93</bold>%</td>
<td><bold>93.40%</bold></td>
<td><bold>97</bold>%</td>
</tr>
<tr>
<td>DT</td>
<td>89%</td>
<td>89%</td>
<td>89%</td>
<td>88.83%</td>
<td>88%</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>As shown in <xref ref-type="table" rid="table-4">Table 4</xref>. above, the GB classifier resulted in the best performance, with an accuracy of 92.89%. While the KNN classifier resulted in the lowest performance. After applying grid search CV, the performance of most models improved, as shown in <xref ref-type="table" rid="table-5">Table 5</xref>. and <xref ref-type="fig" rid="fig-3">Fig. 3</xref>.</p>
<fig id="fig-3">
<label>Figure 3</label>
<caption><title>Optimized models&#x0027; performance</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMC_35710-fig-3.tif"/>
</fig>
<p><xref ref-type="table" rid="table-5">Table 5</xref>. shows that the performance improved in all models except for the DT model, in which the performance was the same. The GB model resulted in the best performance, with an accuracy of 93.40% and a precision of 94%. Although all the evaluation metrics improved after the optimization, the ROC AUC decreased in all the models; it reached 98% before optimization and 97% after optimization in both the RF and GB models. These experiments demonstrate that the ensemble models outperformed the single classifiers; the RF and GB models resulted in the best performance among all the models. While the KNN model resulted in the lowest performance with an accuracy level of 82%. Even though the DT model resulted in an accuracy level of 89% which is higher than KNN, they both resulted in similar ROC-AUC of 88% and 87% respectively. <xref ref-type="fig" rid="fig-4">Fig. 4</xref>. and <xref ref-type="fig" rid="fig-5">Fig. 5</xref>. show the confusion matrices for the RF and GB models.</p>
<fig id="fig-4">
<label>Figure 4</label>
<caption><title>Confusion matrix for RF</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMC_35710-fig-4.tif"/>
</fig><fig id="fig-5">
<label>Figure 5</label>
<caption><title>Confusion matrix for GB</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMC_35710-fig-5.tif"/>
</fig>
<p>The confusion matrix shows that RF misclassified 16 instances, whereas GB misclassified 13 instances; this explains the difference between the accuracy of the two models. For a more detailed comparison, <xref ref-type="fig" rid="fig-6">Figs. 6</xref>. and <xref ref-type="fig" rid="fig-7">7</xref>. show the ROC AUC of the RF and GB models.</p>
<fig id="fig-6">
<label>Figure 6</label>
<caption><title>ROC AUC for RF</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMC_35710-fig-6.tif"/>
</fig><fig id="fig-7">
<label>Figure 7</label>
<caption><title>ROC AUC for GB</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMC_35710-fig-7.tif"/>
</fig>
<p>The proposed model resulted in high performance on the new Saudi clinical dataset. Although the data is new and was collected from a hospital, the quality of the data was high, and the data was balanced. Moreover, the model required a small number of features to predict miscarriage. This will save the time and effort of patients and doctors. The proposed model is fully automated and can be generalized and used in other Saudi hospitals to reduce the percentage of miscarriages in Saudi Arabia.</p>
</sec>
<sec id="s5">
<label>5</label><title>Conclusion</title>
<p>The risk of pregnancy loss is constantly increasing, and many pregnant women are facing a serious risk. Therefore, it is necessary to determine the predictors of these complications, as they could help obstetricians estimate the risk level at the early stages of pregnancy and lower the risk of miscarriage. In this study, several ML algorithms, namely RF, DT, KNN, and GB, were used to predict miscarriage at early stages to improve the healthcare sector and reduce the risk for pregnant ladies. The study used a real dataset, which contained the data of 984 patients at King Fahad University Hospital, Khobar, KSA, including 23 clinical attributes. The results showed that the GB classifier outperformed the other models with an accuracy of 93.40% and a ROC-AUC of 97%. To the best of the authors&#x2019; knowledge, this is the first study to use ML and a real dataset from KSA for the early prediction of miscarriage. Collecting the data and preparing it to be used as the main limitation of this work. Although the results achieved by the current study are significant, further studies should be conducted to build on these findings. Specifically, the proposed model must be validated using a large, multicenter dataset. For future work, a larger dataset from multiple Saudi hospitals will be collected and combined to build more accurate models. Also, more advanced techniques such as DL will be used to develop enhanced models.</p>
</sec>
</body>
<back>
<ack>
<p>We would like to acknowledge King Fahd Hospital for providing the dataset needed to perform our experiments and build the proposed model.</p>
</ack>
<sec><title>Funding Statement</title>
<p>The authors received no specific funding for this study.</p>
</sec>
<sec sec-type="COI-statement"><title>Conflicts of Interest</title>
<p>The authors declare that they have no conflicts of interest to report regarding the present study.</p>
</sec>
<ref-list content-type="authoryear"><title>References</title>
<ref id="ref-1"><label>[1]</label><mixed-citation publication-type="other"><person-group person-group-type="author"><string-name><given-names>S.</given-names> <surname>Prager</surname></string-name>, <string-name><given-names>E.</given-names> <surname>Micks</surname></string-name> and <string-name><given-names>V. K.</given-names> <surname>Dalton</surname></string-name></person-group>, &#x201C;<article-title>Pregnancy loss (miscarriage): Terminology, risk factors, and etiology</article-title>,&#x201D; <year>2021</year>. [Online]. Available: <ext-link ext-link-type="uri" xlink:href="https://www.medilib.ir/uptodate/show/5439">https://www.medilib.ir/uptodate/show/5439</ext-link>.</mixed-citation></ref>
<ref id="ref-2"><label>[2]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>A.</given-names> <surname>Aslam</surname></string-name>, <string-name><given-names>S.</given-names> <surname>Perera</surname></string-name>, <string-name><given-names>M.</given-names> <surname>Watts</surname></string-name>, <string-name><given-names>D.</given-names> <surname>Kaye</surname></string-name>, <string-name><given-names>J.</given-names> <surname>Layland</surname></string-name> <etal>et al.</etal></person-group><italic>,</italic> &#x201C;<article-title>Previous pre-eclampsia, gestational diabetes and hypertension place women at high cardiovascular risk: But do we ask?</article-title>,&#x201D; <source>Heart Lung and Circulation</source>, vol. <volume>30</volume>, no. <issue>1</issue>, pp. <fpage>154</fpage>&#x2013;<lpage>157</lpage>, <year>2021</year>.</mixed-citation></ref>
<ref id="ref-3"><label>[3]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>C. B.</given-names> <surname>Schmidt</surname></string-name>, <string-name><given-names>I.</given-names> <surname>Voorhorst</surname></string-name>, <string-name><given-names>V. H. W.</given-names> <surname>Gaar</surname></string-name>, <string-name><given-names>A.</given-names> <surname>Keukens</surname></string-name>, <string-name><given-names>B. J. P.</given-names> <surname>Loon</surname></string-name> <etal>et al.</etal></person-group><italic>,</italic> &#x201C;<article-title>Diabetes distress is associated with adverse pregnancy outcomes in women with gestational diabetes: A prospective cohort study</article-title>,&#x201D; <source>BMC Pregnancy Childbirth</source>, vol. <volume>19</volume>, no. <issue>1</issue>, pp. <fpage>1</fpage>&#x2013;<lpage>9</lpage>, <year>2019</year>.</mixed-citation></ref>
<ref id="ref-4"><label>[4]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>A.</given-names> <surname>Mora-S&#x00E1;nchez</surname></string-name>, <string-name><given-names>D.-I.</given-names> <surname>Aguilar-Salvador</surname></string-name> and <string-name><given-names>I.</given-names> <surname>Nowak</surname></string-name></person-group>, &#x201C;<article-title>Towards a gamete matching platform: Using immunogenetics and artificial intelligence to predict recurrent miscarriage</article-title>,&#x201D; <source>NPJ Digital Medicine</source>, vol. <volume>2</volume>, no. <issue>1</issue>, pp. <fpage>1</fpage>&#x2013;<lpage>6</lpage>, <year>2019</year>.</mixed-citation></ref>
<ref id="ref-5"><label>[5]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>V.</given-names> <surname>Bruno</surname></string-name>, <string-name><given-names>M.</given-names> <surname>D&#x2019;Orazio</surname></string-name>, <string-name><given-names>C.</given-names> <surname>Ticconi</surname></string-name>, <string-name><given-names>P.</given-names> <surname>Abundo</surname></string-name>, <string-name><given-names>S.</given-names> <surname>Riccio</surname></string-name> <etal>et al.</etal></person-group><italic>,</italic> &#x201C;<article-title>Machine learning (ML) based-method applied in recurrent pregnancy loss (RPL) patients diagnostic work-up: A potential innovation in common clinical practice</article-title>,&#x201D; <source>Scientific Reports</source>, vol. <volume>10</volume>, no. <issue>1</issue>, pp. <fpage>1</fpage>&#x2013;<lpage>12</lpage>, <year>2020</year>.</mixed-citation></ref>
<ref id="ref-6"><label>[6]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>J. H.</given-names> <surname>Jhee</surname></string-name>, <string-name><given-names>S. H.</given-names> <surname>Lee</surname></string-name>, <string-name><given-names>Y.</given-names> <surname>Park</surname></string-name>, <string-name><given-names>S. E.</given-names> <surname>Lee</surname></string-name>, <string-name><given-names>Y. A.</given-names> <surname>Kim</surname></string-name> <etal>et al.</etal></person-group><italic>,</italic> &#x201C;<article-title>Prediction model development of late-onset preeclampsia using machine learning-based methods</article-title>,&#x201D; <source>PLoS One</source>, vol. <volume>14</volume>, no. <issue>8</issue>, pp. <fpage>1</fpage>&#x2013;<lpage>12</lpage>, <year>2019</year>.</mixed-citation></ref>
<ref id="ref-7"><label>[7]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>H.</given-names> <surname>Alptekin</surname></string-name>, <string-name><given-names>T.</given-names> <surname>Acar</surname></string-name>, <string-name><given-names>H.</given-names> <surname>I&#x015F;&#x0131;k</surname></string-name> and <string-name><given-names>T.</given-names> <surname>Cengiz</surname></string-name></person-group>, &#x201C;<article-title>Ultrasound prediction of spontaneous abortions in live embryos in the first trimester</article-title>,&#x201D; <source>Electronic Journal of General Medicine</source>, vol. <volume>13</volume>, no. <issue>4</issue>, pp. <fpage>86</fpage>&#x2013;<lpage>90</lpage>, <year>2016</year>.</mixed-citation></ref>
<ref id="ref-8"><label>[8]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>Y.</given-names> <surname>Miyagi</surname></string-name>, <string-name><given-names>T.</given-names> <surname>Habara</surname></string-name>, <string-name><given-names>R.</given-names> <surname>Hirata</surname></string-name> and <string-name><given-names>N.</given-names> <surname>Hayashi</surname></string-name></person-group>, &#x201C;<article-title>Feasibility of artificial intelligence for predicting live birth without aneuploidy from a blastocyst image</article-title>,&#x201D; <source>Reproductive Medicine and Biology</source>, vol. <volume>18</volume>, no. <issue>2</issue>, pp. <fpage>204</fpage>&#x2013;<lpage>211</lpage>, <year>2019</year>.</mixed-citation></ref>
<ref id="ref-9"><label>[9]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>E.</given-names> <surname>Malacova</surname></string-name>, <string-name><given-names>S.</given-names> <surname>Tippaya</surname></string-name>, <string-name><given-names>H. D.</given-names> <surname>Bailey</surname></string-name>, <string-name><given-names>K.</given-names> <surname>Chai</surname></string-name>, <string-name><given-names>B. M.</given-names> <surname>Farrant</surname></string-name> <etal>et al.</etal></person-group><italic>,</italic> &#x201C;<article-title>Stillbirth risk prediction using machine learning for a large cohort of births from Western Australia, 1980-2015</article-title>,&#x201D; <source>Scientific Reports</source>, vol. <volume>10</volume>, no. <issue>1</issue>, pp. <fpage>1</fpage>&#x2013;<lpage>8</lpage>, <year>2020</year>.</mixed-citation></ref>
<ref id="ref-10"><label>[10]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>A.</given-names> <surname>Koivu</surname></string-name> and <string-name><given-names>M.</given-names> <surname>Sairanen</surname></string-name></person-group>, &#x201C;<article-title>Predicting risk of stillbirth and preterm pregnancies with machine learning</article-title>,&#x201D; <source>Health Information Science and Systems</source>, vol. <volume>8</volume>, no. <issue>1</issue>, pp. <fpage>1</fpage>&#x2013;<lpage>12</lpage>, <year>2020</year>.</mixed-citation></ref>
<ref id="ref-11"><label>[11]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>L.</given-names> <surname>Liu</surname></string-name>, <string-name><given-names>Y.</given-names> <surname>Jiao</surname></string-name>, <string-name><given-names>X.</given-names> <surname>Li</surname></string-name>, <string-name><given-names>Y.</given-names> <surname>Ouyang</surname></string-name> and <string-name><given-names>D.</given-names> <surname>Shi</surname></string-name></person-group>, &#x201C;<article-title>Machine learning algorithms to predict early pregnancy loss after in vitro fertilization-embryo transfer with fetal heart rate as a strong predictor</article-title>,&#x201D; <source>Computer Methods and Programs Biomedicine</source>, vol. <volume>196</volume>, no. <issue>3</issue>, pp. <fpage>1</fpage>&#x2013;<lpage>4</lpage>, <year>2020</year>.</mixed-citation></ref>
<ref id="ref-12"><label>[12]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>H.</given-names> <surname>Asri</surname></string-name>, <string-name><given-names>H.</given-names> <surname>Mousannif</surname></string-name> and <string-name><given-names>H. Al</given-names> <surname>Moatassime</surname></string-name></person-group>, &#x201C;<article-title>Big data analytics in healthcare: Case study-miscarriage prediction</article-title>,&#x201D; <source>International Journal of Distributed Systems and Technologies</source>, vol. <volume>10</volume>, no. <issue>4</issue>, pp. <fpage>45</fpage>&#x2013;<lpage>58</lpage>, <year>2020</year>.</mixed-citation></ref>
<ref id="ref-13"><label>[13]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>Y.</given-names> <surname>Mu</surname></string-name>, <string-name><given-names>K.</given-names> <surname>Feng</surname></string-name>, <string-name><given-names>Y.</given-names> <surname>Yang</surname></string-name> and <string-name><given-names>J.</given-names> <surname>Wang</surname></string-name></person-group>, &#x201C;<article-title>Applying deep learning for adverse pregnancy outcome detection with pre-pregnancy health data</article-title>,&#x201D; <source>101 Eurotherm Seminar-Transport Phenomena in Multiphase Systems</source>, vol. <volume>189</volume>, no. <issue>1</issue>, pp. <fpage>1</fpage>&#x2013;<lpage>6</lpage>, <year>2018</year>.</mixed-citation></ref>
<ref id="ref-14"><label>[14]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>U. A.</given-names> <surname>Bhatti</surname></string-name>, <string-name><given-names>M.</given-names> <surname>Huang</surname></string-name>, <string-name><given-names>Y.</given-names> <surname>Zhang</surname></string-name> and <string-name><given-names>W.</given-names> <surname>Feng</surname></string-name></person-group>, &#x201C;<article-title>Research on the smartphone based ehealth systems for strengthing healthcare organization</article-title>,&#x201D; <source>International Conference on Smart Health</source>, vol. <volume>10219 LNCS</volume>, no. <issue>1</issue>, pp. <fpage>91</fpage>&#x2013;<lpage>101</lpage>, <year>2017</year>.</mixed-citation></ref>
<ref id="ref-15"><label>[15]</label><mixed-citation publication-type="book"><person-group person-group-type="author"><string-name><given-names>J.</given-names> <surname>Han</surname></string-name>, <string-name><given-names>J.</given-names> <surname>Pei</surname></string-name> and <string-name><given-names>M.</given-names> <surname>Kamber</surname></string-name></person-group>, &#x201C;<chapter-title>Data mining: Concepts and techniques</chapter-title>.&#x201D; in: <source>The Morgan Kaufmann Series in Data Management Systems, Data Mining</source>, <edition>3</edition><sup>rd</sup> ed., vol. <volume>1</volume>. <publisher-loc>Burlington, Massachusetts, United States</publisher-loc>: <publisher-name>Science Direct</publisher-name>, pp. <fpage>83</fpage>&#x2013;<lpage>124</lpage>, <year>2012</year>.</mixed-citation></ref>
<ref id="ref-16"><label>[16]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>A.</given-names> <surname>Tharwat</surname></string-name></person-group>, &#x201C;<article-title>Classification assessment methods</article-title>,&#x201D; <source>Emerald Insights</source>, vol. <volume>17</volume>, no. <issue>1</issue>, pp. <fpage>168</fpage>&#x2013;<lpage>192</lpage>, <year>2021</year>.</mixed-citation></ref>
</ref-list>
</back>
</article>