<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.1 20151215//EN" "http://jats.nlm.nih.gov/publishing/1.1/JATS-journalpublishing1.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:mml="http://www.w3.org/1998/Math/MathML" xml:lang="en" article-type="research-article" dtd-version="1.1">
<front>
<journal-meta>
<journal-id journal-id-type="pmc">CMES</journal-id>
<journal-id journal-id-type="nlm-ta">CMES</journal-id>
<journal-id journal-id-type="publisher-id">CMES</journal-id>
<journal-title-group>
<journal-title>Computer Modeling in Engineering &#x0026; Sciences</journal-title>
</journal-title-group>
<issn pub-type="epub">1526-1506</issn>
<issn pub-type="ppub">1526-1492</issn>
<publisher>
<publisher-name>Tech Science Press</publisher-name>
<publisher-loc>USA</publisher-loc>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">67999</article-id>
<article-id pub-id-type="doi">10.32604/cmes.2025.067999</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Article</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Robust Skin Cancer Detection through CNN-Transformer-GRU Fusion and Generative Adversarial Network Based Data Augmentation</article-title>
<alt-title alt-title-type="left-running-head">Robust Skin Cancer Detection through CNN-Transformer-GRU Fusion and Generative Adversarial Network Based Data Augmentation</alt-title>
<alt-title alt-title-type="right-running-head">Robust Skin Cancer Detection through CNN-Transformer-GRU Fusion and Generative Adversarial Network Based Data Augmentation</alt-title>
</title-group>
<contrib-group>
<contrib id="author-1" contrib-type="author">
<name name-style="western"><surname>Varghese</surname><given-names>Alex</given-names></name><xref ref-type="aff" rid="aff-1">1</xref></contrib>
<contrib id="author-2" contrib-type="author">
<name name-style="western"><surname>Jain</surname><given-names>Achin</given-names></name><xref ref-type="aff" rid="aff-2">2</xref></contrib>
<contrib id="author-3" contrib-type="author">
<name name-style="western"><surname>Inamur Rahman</surname><given-names>Mohammed</given-names></name><xref ref-type="aff" rid="aff-3">3</xref></contrib>
<contrib id="author-4" contrib-type="author" corresp="yes">
<name name-style="western"><surname>Khan</surname><given-names>Mudassir</given-names></name><xref ref-type="aff" rid="aff-4">4</xref><email>mkmiyob@kku.edu.sa</email></contrib>
<contrib id="author-5" contrib-type="author">
<name name-style="western"><surname>Dubey</surname><given-names>Arun Kumar</given-names></name><xref ref-type="aff" rid="aff-2">2</xref></contrib>
<contrib id="author-6" contrib-type="author">
<name name-style="western"><surname>Ahmad</surname><given-names>Iqrar</given-names></name><xref ref-type="aff" rid="aff-5">5</xref></contrib>
<contrib id="author-7" contrib-type="author">
<name name-style="western"><surname>Narayan</surname><given-names>Yash Prakash</given-names></name><xref ref-type="aff" rid="aff-1">1</xref></contrib>
<contrib id="author-8" contrib-type="author">
<name name-style="western"><surname>Panwar</surname><given-names>Arvind</given-names></name><xref ref-type="aff" rid="aff-6">6</xref></contrib>
<contrib id="author-9" contrib-type="author">
<name name-style="western"><surname>Choubey</surname><given-names>Anurag</given-names></name><xref ref-type="aff" rid="aff-7">7</xref></contrib>
<contrib id="author-10" contrib-type="author" corresp="yes">
<name name-style="western"><surname>Mallik</surname><given-names>Saurav</given-names></name><xref ref-type="aff" rid="aff-8">8</xref><xref ref-type="aff" rid="aff-9">9</xref><email>smallik@arizona.edu</email><email>sauravmtech2@gmail.com</email></contrib>
<aff id="aff-1"><label>1</label><institution>Department of Computer Science and Engineering, Bharati Vidyapeeth&#x2019;s College of Engineering</institution>, <addr-line>New Delhi, 110063</addr-line>, <country>India</country></aff>
<aff id="aff-2"><label>2</label><institution>Department of Information Technology, Bharati Vidyapeeth&#x2019;s College of Engineering</institution>, <addr-line>New Delhi, 110063</addr-line>, <country>India</country></aff>
<aff id="aff-3"><label>3</label><institution>Applied College of Dhahran Aljunub, Department of Computer Science, King Khalid University</institution>, <addr-line>Aseer, Abha, 64261</addr-line>, <country>Saudi Arabia</country></aff>
<aff id="aff-4"><label>4</label><institution>Department of Computer Science, College of Computer Science, Applied College Tanumah, King Khalid University</institution>, <addr-line>Abha, 61413</addr-line>, <country>Saudi Arabia</country></aff>
<aff id="aff-5"><label>5</label><institution>Department of Computer Science, Technical and Engineering Specialties Unit, King Khalid University</institution>, <addr-line>Muhayil, 63699</addr-line>, <country>Saudi Arabia</country></aff>
<aff id="aff-6"><label>6</label><institution>Galgotias Multi-Disciplinary Research &#x0026; Development Cell (G-MRDC), Galgotias University</institution>, <addr-line>Greater Noida, 201308</addr-line>, <country>UP, India</country></aff>
<aff id="aff-7"><label>7</label><institution>CSE Department, Technocrats Institute of Technology</institution>, <addr-line>Bhopal, 462022</addr-line>, <country>India</country></aff>
<aff id="aff-8"><label>8</label><institution>Department of Environmental Health, Harvard T H Chan School of Public Health</institution>, <addr-line>Boston, MA 02115</addr-line>, <country>USA</country></aff>
<aff id="aff-9"><label>9</label><institution>Department of Pharmacology &#x0026; Toxicology, University of Arizona</institution>, <addr-line>Tucson, AZ 85721</addr-line>, <country>USA</country></aff>
</contrib-group>
<author-notes>
<corresp id="cor1"><label>&#x002A;</label>Corresponding Authors: Mudassir Khan. Email: <email>mkmiyob@kku.edu.sa</email>; Saurav Mallik. Email: <email>smallik@arizona.edu</email> or <email>sauravmtech2@gmail.com</email></corresp>
</author-notes>
<pub-date date-type="collection" publication-format="electronic">
<year>2025</year>
</pub-date>
<pub-date date-type="pub" publication-format="electronic">
<day>31</day><month>08</month><year>2025</year>
</pub-date>
<volume>144</volume>
<issue>2</issue>
<fpage>1767</fpage>
<lpage>1791</lpage>
<history>
<date date-type="received">
<day>19</day>
<month>5</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>31</day>
<month>7</month>
<year>2025</year>
</date>
</history>
<permissions>
<copyright-statement>&#x00A9; 2025 The Authors.</copyright-statement>
<copyright-year>2025</copyright-year>
<copyright-holder>Published by Tech Science Press.</copyright-holder>
<license xlink:href="https://creativecommons.org/licenses/by/4.0/">
<license-p>This work is licensed under a <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution 4.0 International License</ext-link>, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
</license>
</permissions>
<self-uri content-type="pdf" xlink:href="TSP_CMES_67999.pdf"></self-uri>
<abstract>
<p>Skin cancer remains a significant global health challenge, and early detection is crucial to improving patient outcomes. This study presents a novel deep learning framework that combines Convolutional Neural Networks (CNNs), Transformers, and Gated Recurrent Units (GRUs) for robust skin cancer classification. To address data set imbalance, we employ StyleGAN3-based synthetic data augmentation alongside traditional techniques. The hybrid architecture effectively captures both local and global dependencies in dermoscopic images, while the GRU component models sequential patterns. Evaluated on the HAM10000 dataset, the proposed model achieves an accuracy of 90.61%, outperforming baseline architectures such as VGG16 and ResNet. Our system also demonstrates superior precision (91.11%), recall (95.28%), and AUC (0.97), highlighting its potential as a reliable diagnostic tool for the detection of melanoma. This work advances automated skin cancer diagnosis by addressing critical challenges related to class imbalance and limited generalization in medical imaging.</p>
</abstract>
<kwd-group kwd-group-type="author">
<kwd>Skin cancer detection</kwd>
<kwd>deep learning</kwd>
<kwd>CNN</kwd>
<kwd>transformer</kwd>
<kwd>GRU</kwd>
<kwd>StyleGAN3</kwd>
</kwd-group>
<funding-group>
<award-group id="awg1">
<funding-source>King Khalid University</funding-source>
</award-group>
</funding-group>
</article-meta>
</front>
<body>
<sec id="s1">
<label>1</label>
<title>Introduction</title>
<p>Skin cancer, especially melanoma, represents a major public health problem associated with high occurrence rates and potentially dire consequences without early diagnosis. Melanoma arises from melanocytes and is distinguished by its high tendency to metastasize. Only 2 to 3 percent of skin cancers are melanoma, yet it causes most skin cancer deaths. From a clinical perspective, melanoma commonly presents as an asymmetrically pigmented skin lesion and thus can mimic benign moles, making early diagnosis challenging. More than 132,000 new cases of melanoma are reported each year, with the World Health Organization recording cases of the disease worldwide. Though highly curable if identified at an early stage, melanoma remains responsible for an excessive number of skin cancer deaths. Hence, early and accurate diagnosis has great value for better patient outcomes and decreasing mortality</p>
<p>With the rising popularity of noninvasive imaging technology, such as dermoscopy, it is now possible to visualize skin lesions in detail, but dermoscopic images are difficult to interpret without expertise. Computer vision and deep learning-based technologies have been introduced to assist dermatologists by providing automated tools. However, despite these advancements, there are still some barriers to overcome in the development of robust computer vision models for automatic skin lesion classification. One of the biggest challenges is the strong class imbalance in dermoscopy image databases; benign lesions, as melanocytic nevi and seborrheic keratoses, are typically overrepresented, while in the case of malignant lesions, in particular melanomas, they are underrepresented. Such an imbalance introduces classifiers that, while often fairly accurate, have a bias towards the majority class, potentially leading to a large number of false negatives for melanoma with serious clinical implications.</p>
<p>To address this issue, previous work has also tried to minimize the impact of class imbalance, using techniques such as cost-sensitive learning, resampling, and data augmentation [<xref ref-type="bibr" rid="ref-1">1</xref>]. Among them, data augmentation presented a great potential, in particular thanks to the application of Generative Adversarial Networks (GANs), which can generate realistic images to augment minority classes. Compared to the traditional data augmentation (e.g., flipping, rotation, color jittering) about the images in the dataset, GANs, namely the recent sophisticated GAN models (like StyleGAN2 [train-0] and StyleGAN3), have shown that these generated dermoscopic image are of more realistic-looking visual appearance in representative with the skin lesions [train-0] [<xref ref-type="bibr" rid="ref-2">2</xref>].</p>
<p>In the context of these complexities and new possibilities, this research aims to propose a reliable and interpreted embedded deep learning framework for the classification of melanoma that sufficiently deals with unbalanced classes and improves the accuracy lower bound. Particularly, we apply a hybrid model with convolutional neural networks (CNNs), Transformers, and gated recurrent units (GRUs), concatenated with images generated from a GAN, based on StyleGAN3. Each model component covers potential aspects in lesion analysis: CNNs focus on learning localized patterns, Transformers help to highlight global context information through a self-attention mechanism, and the GRUs serve as building blocks for learning sequential relationships, e.g., how a lesion changes over time or how multiview images are input. In addition to the architecture, to obtain even more refined synthetic images, using a modified DCGAN (Deep Convolutional Generative Adversarial Network) containing upsampling convolution blocks and linear projection of the latent vector to generate high-resolution (512 <inline-formula id="ieqn-1"><mml:math id="mml-ieqn-1"><mml:mo>&#x00D7;</mml:mo></mml:math></inline-formula> 512) dermoscopic images that are ideal for clinical datasets. We can observe that our architecture and enhancement approach offer a noticeable increase in both classification accuracy and sensitivity, particularly concerning melanoma, thus confirming the efficacy of our solution.</p>
<p><italic>Key Contributions</italic></p>
<p>This research presents several key contributions:
<list list-type="bullet">
<list-item>
<p>Development of a hybrid CNN&#x2013;Transformer&#x2013;GRU architecture that captures local, global, and sequential features of dermoscopic images.</p></list-item>
<list-item>
<p>GAN-based data augmentation using StyleGAN3 to mitigate class imbalance.</p></list-item>
<list-item>
<p>Design of a modified DCGAN for high-resolution image synthesis with improved spatial fidelity.</p></list-item>
<list-item>
<p>Proposal of a parallel feature fusion mechanism that integrates CNN-extracted local features with global attention from Transformers.</p></list-item>
<list-item>
<p>Demonstration of superior classification performance, achieving 90.61% accuracy and 95.28% recall for melanoma detection, outperforming 10 state-of-the-art baseline models.</p></list-item>
<list-item>
<p>Clinical validation through cross-dataset generalization tests using the HAM10000 dataset.</p></list-item>
</list></p>
<p>Previous studies [<xref ref-type="bibr" rid="ref-3">3</xref>,<xref ref-type="bibr" rid="ref-4">4</xref>] have shown the effectiveness of CNNs and GANs in isolation; however, to our knowledge, none have simultaneously integrated attention mechanisms with sequential modeling for the classification of skin lesions. Our proposed model addresses this gap and demonstrates robust performance on challenging clinical benchmarks.</p>
<p>The remainder of this paper is organized as follows. <xref ref-type="sec" rid="s2">Section 2</xref> reviews the relevant literature. <xref ref-type="sec" rid="s3">Section 3</xref> details the material and methods, including data augmentation and a hybrid architecture. <xref ref-type="sec" rid="s4">Section 4</xref> presents the proposed methodology, and <xref ref-type="sec" rid="s5">Section 5</xref> presents the experimental setup and analysis of the results. <xref ref-type="sec" rid="s6">Section 6</xref> discusses the results, benchmarking, and clinical implications. <xref ref-type="sec" rid="s6">Section 6</xref> introduces a comparative evaluation using the TOPSIS ranking method. <xref ref-type="sec" rid="s7">Section 7</xref> concludes the study with insight and directions for future work.</p>
</sec>
<sec id="s2">
<label>2</label>
<title>Related Work</title>
<p>Early and accurate detection of skin cancer is essential for effective treatment and improved patient outcomes. Deep learning techniques have significantly advanced medical image analysis, particularly in the classification of skin lesions. Numerous studies have explored various architectures and methodologies to improve diagnostic accuracy. Reference [<xref ref-type="bibr" rid="ref-5">5</xref>] proposed a deep convolutional neural network (CNN) architecture for the classification of multiclass skin cancer. They applied data pre-processing and augmentation techniques on the HAM10000 dataset and evaluated multiple models, including ResNet-50, VGG-16, DenseNet, MobileNet, InceptionV3, and Xception. Among these, InceptionV3 achieved the highest accuracy of 90%, demonstrating its effectiveness in distinguishing seven classes of skin lesions. Similarly, reference [<xref ref-type="bibr" rid="ref-6">6</xref>] focused on the interpretation in deep learning models for the classification of skin cancer. Using pre-trained architectures such as XceptionNet, EfficientNetV2S, InceptionResNetV2, and EfficientNetV2M, combined with image enhancement to address class imbalance, they identified XceptionNet as the top performer with an accuracy of 88.72%. The study notably emphasized the role of explainable artificial intelligence (XAI) in building trust and facilitating clinical integration.</p>
<p>Recent deep learning approaches have greatly contributed to the classification of skin cancer. For example, the seven-class classification was proposed with the use of MobileNet with transfer learning in the HAM10000 dataset in [<xref ref-type="bibr" rid="ref-7">7</xref>]. Their method resulted in a categorical accuracy of 83.1%, top-2 91.36% and top-3 95.34%, motivating the use of lightweight models like MobileNet for clinical applications. In [<xref ref-type="bibr" rid="ref-8">8</xref>], we considered the binary classification of skin lesions, classifying between benign and malignant by transfer learning. Five pretrained models were fine-tuned, and data enhancement was used, and the best classification result was from ResNet-50, where it achieved an accuracy of 93.5% and F1 of 0.86, highlighting the potential of transfer learning in medical images. In addition, a hybrid deep learning model based on InceptionV3 and DenseNet121 combined with a weighted sum fusion was presented to improve performance in the classification of benign and malignant lesions to improve the classification of benign and malignant masses [<xref ref-type="bibr" rid="ref-5">5</xref>]. Recently, MonkeyNet achieved 98.91% accuracy for multiclass skin disease classification using a modified DenseNet-201 architecture and the MSID dataset [<xref ref-type="bibr" rid="ref-9">9</xref>]. A recent systematic review highlighted the effectiveness of Vision Transformers (ViTs) in improving skin cancer classification and segmentation performance on the ISIC dataset, addressing challenges like data duplication and augmentation [<xref ref-type="bibr" rid="ref-10">10</xref>]. This hybrid model obtained a detection rate of 92.27%, superior to the separate models and stronger assistance in detecting skin cancer. A set of these and other relevant methods, along with accuracy, datasets, and evaluation metrics, can be found in <xref ref-type="table" rid="table-1">Table 1</xref>.</p>
<table-wrap id="table-1">
<label>Table 1</label>
<caption>
<title>Previous approaches in skin cancer detection</title>
</caption>
<table>
<colgroup>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th align="center">Year</th>
<th align="center">Citation</th>
<th align="center">Dataset</th>
<th align="center">Approach</th>
<th align="center">Eval metric</th>
<th align="center">Score</th>
</tr>
</thead>
<tbody>
<tr>
<td>2016</td>
<td>[<xref ref-type="bibr" rid="ref-11">11</xref>]</td>
<td>&#x2013;</td>
<td>VGG-16 and CNN</td>
<td>Accuracy</td>
<td>0.78</td>
</tr>
<tr>
<td>2017</td>
<td>[<xref ref-type="bibr" rid="ref-3">3</xref>]</td>
<td>ISIC-Dermoscopic Archive</td>
<td>Deep CNN</td>
<td>Accuracy</td>
<td>0.72</td>
</tr>
<tr>
<td>2019</td>
<td>[<xref ref-type="bibr" rid="ref-12">12</xref>]</td>
<td>ISIC 2017</td>
<td>ResNet-50-18</td>
<td>Accuracy/AUC</td>
<td>0.89/0.91</td>
</tr>
<tr>
<td>2019</td>
<td>[<xref ref-type="bibr" rid="ref-4">4</xref>]</td>
<td>ISIC 2017, ISIC 2018, PH2</td>
<td>Deep convolutional GAN</td>
<td>Accuracy/AUC</td>
<td>86.1/91.5</td>
</tr>
<tr>
<td>2020</td>
<td>[<xref ref-type="bibr" rid="ref-13">13</xref>]</td>
<td>5846 clinical images</td>
<td>FRCNN</td>
<td>Accuracy/Recall /Precision</td>
<td>0.862/83.3 /94.5</td>
</tr>
<tr>
<td>2023</td>
<td>[<xref ref-type="bibr" rid="ref-14">14</xref>]</td>
<td>HAM10000</td>
<td>Inception-Res (Inception-Re) CNN</td>
<td>Accuracy</td>
<td>0.8668</td>
</tr>
<tr>
<td>2025</td>
<td>[<xref ref-type="bibr" rid="ref-15">15</xref>]</td>
<td>ISIC</td>
<td>CNN, CapsNet, GCN</td>
<td>Accuracy</td>
<td>0.9030/0.8790 /0.8680</td>
</tr>
<tr>
<td>2021</td>
<td>[<xref ref-type="bibr" rid="ref-16">16</xref>]</td>
<td>ISIC 2017</td>
<td>NASNetMobile</td>
<td>Accuracy</td>
<td>0.82</td>
</tr>
<tr>
<td>2022</td>
<td>[<xref ref-type="bibr" rid="ref-17">17</xref>]</td>
<td>ISIC</td>
<td>CNN (Conv2D)</td>
<td>Accuracy</td>
<td>0.94</td>
</tr>
<tr>
<td>2025</td>
<td>[<xref ref-type="bibr" rid="ref-18">18</xref>]</td>
<td>HAM10000</td>
<td>DCAN-Net (CNN with modified attention)</td>
<td>Accuracy/Precision /Recall</td>
<td>0.9757/97.00 /97.57</td>
</tr>
<tr>
<td>2024</td>
<td>[<xref ref-type="bibr" rid="ref-19">19</xref>]</td>
<td>ISIC 2020</td>
<td>Hybrid CNN and Vision Transformer (ViT)</td>
<td>Accuracy/Precision /Recall</td>
<td>0.94/91.0 /90.0</td>
</tr>
<tr>
<td>2025</td>
<td>[<xref ref-type="bibr" rid="ref-20">20</xref>]</td>
<td>Kaggle (Skin Lesions - 10015 images)</td>
<td>FCDS-CNN</td>
<td>Accuracy</td>
<td>0.96</td>
</tr>
<tr>
<td>2025</td>
<td>[<xref ref-type="bibr" rid="ref-21">21</xref>]</td>
<td>HAM10000</td>
<td>Fine-tuned DenseNet-121</td>
<td>Accuracy</td>
<td>0.87</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Collectively, these studies demonstrate significant progress in skin cancer detection through the integration of advanced architectures, transfer learning, data augmentation, and model fusion. Building on these advances, our research proposes a novel deep learning framework that further improves classification accuracy and robustness. By incorporating Generative Adversarial Networks (GANs) for synthetic data augmentation, we effectively address class imbalance, resulting in more generalized and reliable predictions. In addition, our model improves interpretability, contributing to its practical applicability as a trustworthy tool for early and accurate skin cancer diagnosis.</p>
</sec>
<sec id="s3">
<label>3</label>
<title>Materials and Methods</title>
<sec id="s3_1">
<label>3.1</label>
<title>Description of Dataset</title>
<p>The dataset in this article contains dermoscopic images collected from publicly available databases HAM10000 dataset [<xref ref-type="bibr" rid="ref-22">22</xref>]. Given the clinical need of distinguishing malignant from benign lesions, a binary classification approach was applied, labelling lesions as either malignant (melanoma) or benign (non-melanoma). A grid of illustrative sample images from the dataset is shown in <xref ref-type="fig" rid="fig-1">Fig. 1</xref> which illustrates the heterogeneity in lesion properties, through various size, shape, color and texture.</p>
<fig id="fig-1">
<label>Figure 1</label>
<caption>
<title>Sample images from the dataset</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMES_67999-fig-1.tif"/>
</fig>
</sec>
<sec id="s3_2">
<label>3.2</label>
<title>Data Preprocessing</title>
<p>A significant issue in automatic skin cancer detection is the large class imbalance in dermatological datasets, where cases of malignancy have a strong underrepresentation. This imbalance tends to bias model predictions to the majority class and manifests in low sensitivity and low accuracy in detecting the minority class, which is the clinically important melanoma. To address this problem, we used synthetic data augmentation based on several variations of generative adversarial networks (GANs, with particular emphasis on the minority melanoma class. This augmentation approach is designed to increase the model&#x2019;s generalizability by letting it see a wider variety of lesion appearances as well as overcoming the lack of data.</p>
<p>In particular, this study used a variation of Deep Convolutional GAN (DCGAN), a standard DCGAN [<xref ref-type="bibr" rid="ref-23">23</xref>], and StyleGAN3 [<xref ref-type="bibr" rid="ref-24">24</xref>]. Further synthetic examples were generated based on these models by training them on melanoma class-only samples. <xref ref-type="fig" rid="fig-2">Fig. 2</xref> shows synthetic melanoma images generated by each GAN variant: (a) Standard DCGAN, (b) Modified DCGAN, and (c) StyleGAN3. <xref ref-type="table" rid="table-2">Table 2</xref> presents the various parameters that were utilized for generating synthetic samples with the GANs. Each GAN Model was evaluated in terms of their capacity to produce high-quality, clinically realistic dermoscopic images, and numerical image quality and diversity measures. Before augmentation, the number of class 1 (cancerous skin lesion) and class 2 (normal skin) observations was 1954 and 8061, respectively, showing a considerable class imbalance. By GAN augmentation, the number of cancerous skin images became 6954, and that of normal skin images became 8061. This process led to a more equitable data set and permitted the model to expose itself to a larger and wider variety of manifestations of melanoma.</p>
<fig id="fig-2">
<label>Figure 2</label>
<caption>
<title>Synthetic melanoma images generated by GAN variants: (<bold>a</bold>) Standard DCGAN, (<bold>b</bold>) Modified DCGAN, (<bold>c</bold>) StyleGAN3</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMES_67999-fig-2.tif"/>
</fig><table-wrap id="table-2">
<label>Table 2</label>
<caption>
<title>Comparison of GAN variants for synthetic image generation</title>
</caption>
<table>
<colgroup>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th align="center">Feature</th>
<th align="center">Modified DCGAN</th>
<th align="center">Simple DCGAN</th>
<th align="center">StyleGAN3</th>
</tr>
</thead>
<tbody>
<tr>
<td>Image resolution</td>
<td>512 <inline-formula id="ieqn-3"><mml:math id="mml-ieqn-3"><mml:mo>&#x00D7;</mml:mo></mml:math></inline-formula> 512</td>
<td>512 <inline-formula id="ieqn-4"><mml:math id="mml-ieqn-4"><mml:mo>&#x00D7;</mml:mo></mml:math></inline-formula> 512 (Default: 64 <inline-formula id="ieqn-5"><mml:math id="mml-ieqn-5"><mml:mo>&#x00D7;</mml:mo></mml:math></inline-formula> 64, 128 <inline-formula id="ieqn-6"><mml:math id="mml-ieqn-6"><mml:mo>&#x00D7;</mml:mo></mml:math></inline-formula> 128)</td>
<td>512 <inline-formula id="ieqn-7"><mml:math id="mml-ieqn-7"><mml:mo>&#x00D7;</mml:mo></mml:math></inline-formula> 512 (1024 <inline-formula id="ieqn-8"><mml:math id="mml-ieqn-8"><mml:mo>&#x00D7;</mml:mo></mml:math></inline-formula> 1024 possible)</td>
</tr>
<tr>
<td>Upsampling method</td>
<td>Bilinear/Nearest-Neighbor Upsampling &#x002B; Conv</td>
<td>Transposed convolutions</td>
<td>Alias-Free convolutions</td>
</tr>
<tr>
<td>Noise input</td>
<td>Latent vector (<italic>Z</italic>)</td>
<td>Latent vector (<italic>Z</italic>)</td>
<td>Noise injected at multiple layers</td>
</tr>
</tbody>
</table>
</table-wrap>
<p><list list-type="bullet">
<list-item>
<p><bold>Modified DCGAN:</bold> Tailored for 512 <inline-formula id="ieqn-2"><mml:math id="mml-ieqn-2"><mml:mo>&#x00D7;</mml:mo></mml:math></inline-formula> 512 image resolution, this variant replaces conventional transposed convolution layers with upsampling followed by convolutional blocks, effectively mitigating checkerboard artifacts. The generator begins with fully connected linear layers that project the latent vector (<italic>Z</italic>) into a high-dimensional feature space, facilitating smoother transitions in the latent representation. Although this architecture improves on the standard DCGAN, it still falls short of StyleGAN3 in generating the fine-grained dermoscopic details essential for clinical realism.</p></list-item>
<list-item>
<p><bold>Standard DCGAN:</bold> Serving as a baseline, this architecture employs transposed convolutions for upsampling. Although computationally efficient, it tends to produce lower-quality images at higher resolutions, with more pronounced artifacts, which is reflected in its elevated FID and KID scores.</p></list-item>
<list-item>
<p><bold>StyleGAN3:</bold> Selected for its state-of-the-art performance, StyleGAN3 features an alias-free generator architecture that ensures equivariance to image transformations such as translation and rotation, leading to more consistent and artifact-free image synthesis. Its design includes a mapping network that transforms the latent vector <italic>Z</italic> into an intermediate latent space <italic>W</italic>, allowing the disentanglement of style and content. Noise injection in each generator layer further enhances the image variation. These architectural advances allow StyleGAN3 to capture complex high-frequency features such as texture, color variation, and border irregularities common in dermoscopic images, thus producing more realistic and diverse synthetic lesions.</p></list-item>
</list></p>
<p>Overall, the GAN-augmented samples were integrated with the original dataset to form a balanced training set, which was subsequently normalized and resized to a fixed resolution of 512 <inline-formula id="ieqn-9"><mml:math id="mml-ieqn-9"><mml:mo>&#x00D7;</mml:mo></mml:math></inline-formula> 512 prior to input into the hybrid model. This preprocessing pipeline not only addresses class imbalance but also enhances the visual diversity of training samples, which is essential for robust melanoma detection.</p>
<p>To quantitatively assess the quality and diversity of synthetic melanoma images, we utilized three standard metrics: Fr&#x00E9;chet Inception Distance (FID), Kernel Inception Distance (KID), and Inception Score (IS). These metrics compare the distribution of synthetic images to real images and are widely used to evaluate GAN performance. The GAN objective is defined as:
<disp-formula id="eqn-1"><label>(1)</label><mml:math id="mml-eqn-1" display="block"><mml:munder><mml:mo movablelimits="true" form="prefix">min</mml:mo><mml:mi>G</mml:mi></mml:munder><mml:munder><mml:mo movablelimits="true" form="prefix">max</mml:mo><mml:mi>D</mml:mi></mml:munder><mml:mi>V</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>D</mml:mi><mml:mo>,</mml:mo><mml:mi>G</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:msub><mml:mrow><mml:mi mathvariant="double-struck">E</mml:mi></mml:mrow><mml:mrow><mml:mi>x</mml:mi><mml:mo>&#x223C;</mml:mo><mml:msub><mml:mi>p</mml:mi><mml:mrow><mml:mrow><mml:mtext>data</mml:mtext></mml:mrow></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mo stretchy="false">[</mml:mo><mml:mi>log</mml:mi><mml:mo>&#x2061;</mml:mo><mml:mi>D</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>x</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo stretchy="false">]</mml:mo><mml:mo>+</mml:mo><mml:msub><mml:mrow><mml:mi mathvariant="double-struck">E</mml:mi></mml:mrow><mml:mrow><mml:mi>z</mml:mi><mml:mo>&#x223C;</mml:mo><mml:msub><mml:mi>p</mml:mi><mml:mi>z</mml:mi></mml:msub></mml:mrow></mml:msub><mml:mo stretchy="false">[</mml:mo><mml:mi>log</mml:mi><mml:mo>&#x2061;</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:mn>1</mml:mn><mml:mo>&#x2212;</mml:mo><mml:mi>D</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>G</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo stretchy="false">)</mml:mo><mml:mo stretchy="false">)</mml:mo><mml:mo stretchy="false">]</mml:mo></mml:math></disp-formula>where <italic>D</italic> is the discriminator, <italic>G</italic> is the generator, and <italic>z</italic> is the noise vector.
<list list-type="bullet">
<list-item>
<p><bold> Fr&#x00E9;chet Inception Distance (FID):</bold> FID measures the similarity between the feature distributions of real and generated images using activations from a pre-trained Inception-v3 network. It assumes that these features follow a multivariate Gaussian distribution.
<disp-formula id="eqn-2"><label>(2)</label><mml:math id="mml-eqn-2" display="block"><mml:mrow><mml:mtext>FID</mml:mtext></mml:mrow><mml:mo>=</mml:mo><mml:mo fence="false" stretchy="false">&#x2016;</mml:mo><mml:msub><mml:mi>&#x03BC;</mml:mi><mml:mi>r</mml:mi></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>&#x03BC;</mml:mi><mml:mi>g</mml:mi></mml:msub><mml:msup><mml:mo fence="false" stretchy="false">&#x2016;</mml:mo><mml:mn>2</mml:mn></mml:msup><mml:mo>+</mml:mo><mml:mrow><mml:mtext>Tr</mml:mtext></mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi mathvariant="normal">&#x03A3;</mml:mi><mml:mi>r</mml:mi></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi mathvariant="normal">&#x03A3;</mml:mi><mml:mi>g</mml:mi></mml:msub><mml:mo>&#x2212;</mml:mo><mml:mn>2</mml:mn><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi mathvariant="normal">&#x03A3;</mml:mi><mml:mi>r</mml:mi></mml:msub><mml:msub><mml:mi mathvariant="normal">&#x03A3;</mml:mi><mml:mi>g</mml:mi></mml:msub><mml:msup><mml:mo stretchy="false">)</mml:mo><mml:mrow><mml:mfrac><mml:mn>1</mml:mn><mml:mn>2</mml:mn></mml:mfrac></mml:mrow></mml:msup><mml:mo stretchy="false">)</mml:mo></mml:math></disp-formula>where <inline-formula id="ieqn-10"><mml:math id="mml-ieqn-10"><mml:msub><mml:mi>&#x03BC;</mml:mi><mml:mi>r</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi mathvariant="normal">&#x03A3;</mml:mi><mml:mi>r</mml:mi></mml:msub></mml:math></inline-formula> and <inline-formula id="ieqn-11"><mml:math id="mml-ieqn-11"><mml:msub><mml:mi>&#x03BC;</mml:mi><mml:mi>g</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi mathvariant="normal">&#x03A3;</mml:mi><mml:mi>g</mml:mi></mml:msub></mml:math></inline-formula> are the means and covariances of real and generated image features, respectively.</p>
<p><italic>Lower FID values indicate better quality and closer resemblance to real images</italic>.</p></list-item>
<list-item>
<p><bold>Kernel Inception Distance (KID):</bold> Unlike FID, KID uses the squared Maximum Mean Discrepancy (MMD) between the feature distributions of real and generated images, computed using a polynomial kernel.
<disp-formula id="eqn-3"><label>(3)</label><mml:math id="mml-eqn-3" display="block"><mml:mrow><mml:mtext>KID</mml:mtext></mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>X</mml:mi><mml:mo>,</mml:mo><mml:mi>Y</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mrow><mml:mi mathvariant="double-struck">E</mml:mi></mml:mrow><mml:mo stretchy="false">[</mml:mo><mml:mi>k</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>x</mml:mi><mml:mo>,</mml:mo><mml:msup><mml:mi>x</mml:mi><mml:mrow><mml:mi mathvariant="normal">&#x2032;</mml:mi></mml:mrow></mml:msup><mml:mo stretchy="false">)</mml:mo><mml:mo stretchy="false">]</mml:mo><mml:mo>+</mml:mo><mml:mrow><mml:mi mathvariant="double-struck">E</mml:mi></mml:mrow><mml:mo stretchy="false">[</mml:mo><mml:mi>k</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>y</mml:mi><mml:mo>,</mml:mo><mml:msup><mml:mi>y</mml:mi><mml:mrow><mml:mi mathvariant="normal">&#x2032;</mml:mi></mml:mrow></mml:msup><mml:mo stretchy="false">)</mml:mo><mml:mo stretchy="false">]</mml:mo><mml:mo>&#x2212;</mml:mo><mml:mn>2</mml:mn><mml:mrow><mml:mi mathvariant="double-struck">E</mml:mi></mml:mrow><mml:mo stretchy="false">[</mml:mo><mml:mi>k</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>x</mml:mi><mml:mo>,</mml:mo><mml:mi>y</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo stretchy="false">]</mml:mo></mml:math></disp-formula>where <inline-formula id="ieqn-12"><mml:math id="mml-ieqn-12"><mml:mi>x</mml:mi><mml:mo>,</mml:mo><mml:msup><mml:mi>x</mml:mi><mml:mrow><mml:mi mathvariant="normal">&#x2032;</mml:mi></mml:mrow></mml:msup><mml:mo>&#x2208;</mml:mo><mml:mi>X</mml:mi></mml:math></inline-formula> (generated features), <inline-formula id="ieqn-13"><mml:math id="mml-ieqn-13"><mml:mi>y</mml:mi><mml:mo>,</mml:mo><mml:msup><mml:mi>y</mml:mi><mml:mrow><mml:mi mathvariant="normal">&#x2032;</mml:mi></mml:mrow></mml:msup><mml:mo>&#x2208;</mml:mo><mml:mi>Y</mml:mi></mml:math></inline-formula> (real features), and <italic>k</italic> is a kernel function, commonly a degree-3 polynomial.</p>
<p><italic>Lower KID values suggest closer alignment between real and generated image distributions</italic>.</p></list-item>
<list-item>
<p><bold>Inception Score (IS):</bold> IS evaluates the quality and diversity of generated images by using the output class probabilities from a pre-trained Inception network.
<disp-formula id="eqn-4"><label>(4)</label><mml:math id="mml-eqn-4" display="block"><mml:mrow><mml:mtext>IS</mml:mtext></mml:mrow><mml:mo>=</mml:mo><mml:mi>exp</mml:mi><mml:mo>&#x2061;</mml:mo><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mrow><mml:mi mathvariant="double-struck">E</mml:mi></mml:mrow><mml:mrow><mml:mi>x</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mi>D</mml:mi><mml:mrow><mml:mrow><mml:mtext>KL</mml:mtext></mml:mrow></mml:mrow></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mi>p</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>y</mml:mi><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:mi>x</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mspace width="thinmathspace"></mml:mspace><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:mspace width="thinmathspace"></mml:mspace><mml:mi>p</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>y</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo stretchy="false">)</mml:mo><mml:mo>)</mml:mo></mml:mrow></mml:math></disp-formula>
where <inline-formula id="ieqn-14"><mml:math id="mml-ieqn-14"><mml:mi>p</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>y</mml:mi><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:mi>x</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> is the conditional label distribution for image <italic>x</italic>, and <inline-formula id="ieqn-15"><mml:math id="mml-ieqn-15"><mml:mi>p</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>y</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> is the marginal distribution over all images.</p></list-item>
</list></p>
<p><italic>Higher IS values indicate that the model generates diverse images that are confidently classifiable</italic>.</p>
<p>Lower FID and KID values correspond to higher similarity to real images, whereas a higher IS indicates greater diversity and image quality. The comparative performance of these GANs is summarized in <xref ref-type="table" rid="table-3">Table 3</xref>.</p>
<table-wrap id="table-3">
<label>Table 3</label>
<caption>
<title>Quantitative evaluation of GAN models for synthetic image generation</title>
</caption>
<table>
<colgroup>
<col/>
<col/>
<col/>
<col/>
</colgroup>
<thead>
<tr>
<th>Model</th>
<th>FID <inline-formula id="ieqn-16"><mml:math id="mml-ieqn-16"><mml:mo stretchy="false">&#x2193;</mml:mo></mml:math></inline-formula></th>
<th>KID <inline-formula id="ieqn-17"><mml:math id="mml-ieqn-17"><mml:mo stretchy="false">&#x2193;</mml:mo></mml:math></inline-formula></th>
<th>Inception score <inline-formula id="ieqn-18"><mml:math id="mml-ieqn-18"><mml:mo stretchy="false">&#x2191;</mml:mo></mml:math></inline-formula></th>
</tr>
</thead>
<tbody>
<tr>
<td>StyleGAN3</td>
<td>9.8</td>
<td>0.0031</td>
<td>8.9</td>
</tr>
<tr>
<td>Modified DCGAN</td>
<td>16.7</td>
<td>0.0087</td>
<td>6.8</td>
</tr>
<tr>
<td>Standard DCGAN</td>
<td>40.5</td>
<td>0.021</td>
<td>5.2</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The visual comparison, corroborated by the quantitative metrics in <xref ref-type="table" rid="table-3">Table 3</xref>, highlights StyleGAN3&#x2019;s ability to produce lesions with finer details, greater realism, and greater diversity. The FID scores, in particular, show a marked improvement with StyleGAN3 (see <xref ref-type="fig" rid="fig-3">Fig. 3</xref>).</p>
<fig id="fig-3">
<label>Figure 3</label>
<caption>
<title>Comparison of Fr&#x00E9;chet Inception Distance (FID) scores for GAN models (Lower is better)</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMES_67999-fig-3.tif"/>
</fig>
<p>Following a comprehensive comparative analysis that involved qualitative (visual inspection) and quantitative (FID, KID, and Inception Score) evaluations, StyleGAN3-generated images were selected as the most effective due to their superior fidelity, enhanced diversity, and morphological consistency with real dermoscopic patterns. The architectural innovations of StyleGAN3, particularly its alias-free design and ability to disentangle style representations, substantially contribute to its empirical performance. Furthermore, the integration of these high-quality synthetic images with conventional augmentation techniques (e.g., rotation, flipping, contrast enhancement, and Gaussian noise) significantly enriched the training dataset, thereby enhancing the model&#x2019;s capacity to generalize, particularly to rare or atypical cases of melanoma.</p>
<p>To ensure consistency in image representation, all images were resized to a <inline-formula id="ieqn-19"><mml:math id="mml-ieqn-19"><mml:mn>224</mml:mn><mml:mo>&#x00D7;</mml:mo><mml:mn>224</mml:mn></mml:math></inline-formula> resolution before being fed into the model. Normalization was applied by scaling the intensity values of the pixels in the <inline-formula id="ieqn-20"><mml:math id="mml-ieqn-20"><mml:mo stretchy="false">[</mml:mo><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mn>1</mml:mn><mml:mo stretchy="false">]</mml:mo></mml:math></inline-formula> range, stabilizing the gradient updates, and preventing problems related to varying intensity distributions. In addition, noise reduction techniques such as median filtering were employed to suppress artifacts, including hair occlusions and uneven lighting conditions. This preprocessing pipeline ensures that the model receives high-quality input data, facilitating more reliable feature extraction.</p>
</sec>
</sec>
<sec id="s4">
<label>4</label>
<title>Proposed Model: CNN with Parallel Transformer and GRU</title>
<p>Deep learning models have demonstrated remarkable success in medical image analysis, yet individual architectures often have inherent limitations. Convolutional Neural Networks (CNNs) [<xref ref-type="bibr" rid="ref-20">20</xref>], despite their ability to extract local spatial features, struggle to capture long-range dependencies. Transformers, known for their attention mechanisms, effectively capture global dependencies, but often require large-scale datasets. Similarly, Gated Recurrent Units (GRUs) [<xref ref-type="bibr" rid="ref-25">25</xref>] excel in capturing sequential dependencies but are not traditionally used for image classification tasks. To take advantage of the strengths of each architecture while mitigating their respective limitations, we propose a hybrid deep learning model that integrates CNNs, Transformers [<xref ref-type="bibr" rid="ref-26">26</xref>], and GRUs in a parallel configuration. This novel architecture ensures that local, global, and sequential dependencies are effectively captured, leading to a more comprehensive feature extraction process. The features extracted by these parallel modules CNN, Transformer, and GRU, are subsequently fused. Specifically, after the features are extracted from each of the three modules, they are concatenated into a unified feature representation. This fusion mechanism ensures that each module contributes complementary information to the final prediction. A fully connected layer processes the combined feature vector, followed by a sigmoid activation function, to output a binary classification: malignant or benign. The individual components and their roles are detailed in the following subsections. As shown in <xref ref-type="fig" rid="fig-4">Fig. 4</xref> and Algorithm 1, the proposed model consists of three parallel branches: a CNN for the extraction of spatial features, a Transformer for global attention, and a GRU to capture sequential representations. The outputs of all three branches are concatenated and fed into a classifier. In contrast to traditional sequential or stacked hybrid models, our design allows each module: CNN, Transformer, and GRU to learn in parallel from the same input, which encourages diverse and complementary feature learning. The GRU branch is uniquely applied to spatially reshaped image data, an unconventional use that contributes sequential reasoning to visual information. Additionally, we incorporate positional encoding in the Transformer to enhance spatial awareness, making it more suitable for medical image tasks with limited data.</p>
<fig id="fig-4">
<label>Figure 4</label>
<caption>
<title>Proposed model-CNN with parallel (SA Transformer and GRU)</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMES_67999-fig-4.tif"/>
</fig>
<sec id="s4_1">
<label>4.1</label>
<title>Convolutional Neural Network (CNN)</title>
<p>CNNs are basically the baseline of our structure, which extract low-level and high-level spatial features from the input images [<xref ref-type="bibr" rid="ref-27">27</xref>]. The convolution operation at layer l is defined as:
<disp-formula id="eqn-5"><label>(5)</label><mml:math id="mml-eqn-5" display="block"><mml:msub><mml:mi>F</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi><mml:mi>l</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>m</mml:mi><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:mrow><mml:mrow><mml:mi>P</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:munderover><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>q</mml:mi><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:mrow><mml:mrow><mml:mi>Q</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:munderover><mml:msub><mml:mi>W</mml:mi><mml:mrow><mml:mi>p</mml:mi><mml:mi>q</mml:mi><mml:mi>m</mml:mi><mml:mi>l</mml:mi></mml:mrow></mml:msub><mml:mo>&#x22C5;</mml:mo><mml:msub><mml:mi>F</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo>+</mml:mo><mml:mi>p</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi><mml:mo>+</mml:mo><mml:mi>q</mml:mi><mml:mo>,</mml:mo><mml:mi>m</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi>b</mml:mi><mml:mi>l</mml:mi></mml:msub></mml:math></disp-formula>where <inline-formula id="ieqn-21"><mml:math id="mml-ieqn-21"><mml:msubsup><mml:mi>F</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow><mml:mi>l</mml:mi></mml:msubsup></mml:math></inline-formula> is the output feature map, <inline-formula id="ieqn-22"><mml:math id="mml-ieqn-22"><mml:mi>&#x03C3;</mml:mi></mml:math></inline-formula> is the ReLU activation, <italic>W</italic> represents the kernel weights and <inline-formula id="ieqn-23"><mml:math id="mml-ieqn-23"><mml:msub><mml:mi>b</mml:mi><mml:mi>l</mml:mi></mml:msub></mml:math></inline-formula> is the bias term. The convolution layers extract features such as colors, borders of the lesions, and texture, that is useful characteristics to distinguish melanoma from benign lesions. These feature maps are then fed to later modules for further computation.</p>
</sec>
<sec id="s4_2">
<label>4.2</label>
<title>Transformer Module for Global Attention</title>
<p>Transformers have emerged as the state-of-the-art in vision tasks due to their self-attention mechanism that permits them to focus on the most important areas of an image [<xref ref-type="bibr" rid="ref-28">28</xref>]. In the proposed model, we employ the Transformer architecture to capture long-range dependencies and help understand the context of skin lesions. In contrast to CNNs that have local receptive fields, Transformers consider the entire image as a whole and thus have shown particularly good performance in lesion shape and color-variance-based diagnosis applications. The multi-head attention mechanism is computed as:
<disp-formula id="eqn-6"><label>(6)</label><mml:math id="mml-eqn-6" display="block"><mml:mrow><mml:mtext>Attention</mml:mtext></mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>Q</mml:mi><mml:mo>,</mml:mo><mml:mi>K</mml:mi><mml:mo>,</mml:mo><mml:mi>V</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mrow><mml:mtext>softmax</mml:mtext></mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mfrac><mml:mrow><mml:mi>Q</mml:mi><mml:msup><mml:mi>K</mml:mi><mml:mi>T</mml:mi></mml:msup></mml:mrow><mml:msqrt><mml:msub><mml:mi>d</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:msqrt></mml:mfrac><mml:mo>)</mml:mo></mml:mrow><mml:mi>V</mml:mi></mml:math></disp-formula>where <italic>Q</italic>, <italic>K</italic>, and <italic>V</italic> are the query, key, and value matrices, and <inline-formula id="ieqn-24"><mml:math id="mml-ieqn-24"><mml:msub><mml:mi>d</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:math></inline-formula> is the dimension of keys.</p>
</sec>
<sec id="s4_3">
<label>4.3</label>
<title>Gated Recurrent Unit (GRU) for Sequential Dependency Learning</title>
<p>While GRUs are typically used for sequential data, we have used them for features generated by CNN and Transformer. GRUs assist in learning temporal dependencies of feature representations and are thus helpful for the model to maintain useful context from various levels of abstractions [<xref ref-type="bibr" rid="ref-29">29</xref>]. Such sequential processing contributes to the extraction of the subtle variation of lesion structures across different spatial areas, yielding an improved classification performance. The GRU updates its hidden state <inline-formula id="ieqn-25"><mml:math id="mml-ieqn-25"><mml:msub><mml:mi>h</mml:mi><mml:mi>t</mml:mi></mml:msub></mml:math></inline-formula> as:
<disp-formula id="eqn-7"><label>(7)</label><mml:math id="mml-eqn-7" display="block"><mml:mtable columnalign="right left right left right left right left right left right left" rowspacing="3pt" columnspacing="0em 2em 0em 2em 0em 2em 0em 2em 0em 2em 0em" displaystyle="true"><mml:mtr><mml:mtd><mml:msub><mml:mi>z</mml:mi><mml:mi>t</mml:mi></mml:msub></mml:mtd><mml:mtd><mml:mi></mml:mi><mml:mo>=</mml:mo><mml:mi>&#x03C3;</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>W</mml:mi><mml:mi>z</mml:mi></mml:msub><mml:mo>&#x22C5;</mml:mo><mml:mo stretchy="false">[</mml:mo><mml:msub><mml:mi>h</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mi>t</mml:mi></mml:msub><mml:mo stretchy="false">]</mml:mo><mml:mo stretchy="false">)</mml:mo><mml:mtext>&#x00A0;&#x00A0;&#x00A0;&#x00A0;</mml:mtext><mml:mrow><mml:mtext>(Update gate)</mml:mtext></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<disp-formula id="eqn-8"><label>(8)</label><mml:math id="mml-eqn-8" display="block"><mml:mtable columnalign="right left right left right left right left right left right left" rowspacing="3pt" columnspacing="0em 2em 0em 2em 0em 2em 0em 2em 0em 2em 0em" displaystyle="true"><mml:mtr><mml:mtd><mml:msub><mml:mi>r</mml:mi><mml:mi>t</mml:mi></mml:msub></mml:mtd><mml:mtd><mml:mi></mml:mi><mml:mo>=</mml:mo><mml:mi>&#x03C3;</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>W</mml:mi><mml:mi>r</mml:mi></mml:msub><mml:mo>&#x22C5;</mml:mo><mml:mo stretchy="false">[</mml:mo><mml:msub><mml:mi>h</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mi>t</mml:mi></mml:msub><mml:mo stretchy="false">]</mml:mo><mml:mo stretchy="false">)</mml:mo><mml:mtext>&#x00A0;&#x00A0;&#x00A0;&#x00A0;</mml:mtext><mml:mrow><mml:mtext>(Reset gate)</mml:mtext></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<disp-formula id="eqn-9"><label>(9)</label><mml:math id="mml-eqn-9" display="block"><mml:mtable columnalign="right left right left right left right left right left right left" rowspacing="3pt" columnspacing="0em 2em 0em 2em 0em 2em 0em 2em 0em 2em 0em" displaystyle="true"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mover><mml:mi>h</mml:mi><mml:mo stretchy="false">~</mml:mo></mml:mover></mml:mrow><mml:mi>t</mml:mi></mml:msub></mml:mtd><mml:mtd><mml:mi></mml:mi><mml:mo>=</mml:mo><mml:mi>tanh</mml:mi><mml:mo>&#x2061;</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:mi>W</mml:mi><mml:mo>&#x22C5;</mml:mo><mml:mo stretchy="false">[</mml:mo><mml:msub><mml:mi>r</mml:mi><mml:mi>t</mml:mi></mml:msub><mml:mo>&#x2299;</mml:mo><mml:msub><mml:mi>h</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mi>t</mml:mi></mml:msub><mml:mo stretchy="false">]</mml:mo><mml:mo stretchy="false">)</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<disp-formula id="eqn-10"><label>(10)</label><mml:math id="mml-eqn-10" display="block"><mml:mtable columnalign="right left right left right left right left right left right left" rowspacing="3pt" columnspacing="0em 2em 0em 2em 0em 2em 0em 2em 0em 2em 0em" displaystyle="true"><mml:mtr><mml:mtd><mml:msub><mml:mi>h</mml:mi><mml:mi>t</mml:mi></mml:msub></mml:mtd><mml:mtd><mml:mi></mml:mi><mml:mo>=</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:mn>1</mml:mn><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>z</mml:mi><mml:mi>t</mml:mi></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x2299;</mml:mo><mml:msub><mml:mi>h</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi>z</mml:mi><mml:mi>t</mml:mi></mml:msub><mml:mo>&#x2299;</mml:mo><mml:msub><mml:mrow><mml:mover><mml:mi>h</mml:mi><mml:mo stretchy="false">~</mml:mo></mml:mover></mml:mrow><mml:mi>t</mml:mi></mml:msub></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula></p>
<fig id="fig-11">
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMES_67999-fig-11.tif"/>
</fig>
</sec>
</sec>
<sec id="s5">
<label>5</label>
<title>Experimentation, Results, and Analysis</title>
<p>To optimize model performance, we used the Adam optimizer with a learning rate of 0.0001, ensuring efficient weight updates during backpropagation. The model was trained using Binary Cross-Entropy Loss, which is well-suited for binary classification problems, penalizing incorrect predictions based on probability distributions. The data set was split using 5-fold cross-validation, a robust evaluation strategy that ensures the model is tested across multiple partitions of the dataset, reducing bias and improving generalization. Model performance was assessed using key classification metrics, including accuracy, precision, recall, F1 score, and the area under the ROC curve (AUC-ROC). Accuracy measures overall correctness, while precision and recall quantify the trade-off between false positives and false negatives. The F1 score serves as a balanced metric, especially useful for imbalanced datasets, and the AUC-ROC evaluates the model&#x2019;s ability to distinguish between classes across different threshold settings.</p>
<p>The complete flow of our proposed skin lesion classification pipeline is visually depicted in <xref ref-type="fig" rid="fig-5">Fig. 5</xref>. The method starts from the HAM10000 dataset that has seven initial diagnostic labels: Actinic keratoses and intraepithelial carcinoma/Bowen&#x2019;s disease (AKIEC), Basal cell carcinoma (BCC), Benign keratosis-like lesions (BKL), Dermatofibroma (DF), Melanoma (MEL), Melanocytic nevi (NV), Vascular lesions (VASC). In this study, these two classes were renamed malignant and benign to allow binary classification. For the imbalanced nature of the data and specifically for the low number of malignant samples, we employed various GAN methods for augmentation, such as Standard DCGAN, Modified DCGAN, and StyleGAN3. These synthetic images, as well as classic augmentation techniques, were included in our data set to enhance the generalizability of the model. Then, different deep learning models were trained on the augmented dataset, from traditional CNNs to multistage architectures such as CNN, along with parallel Transformer and GRU. To make it robust and prevent overfitting, model training and evaluation were conducted by five-fold cross-validation. Finally, the models were assessed with several performance indicators, such as accuracy, recall, precision, F1-Score and AUC [<xref ref-type="bibr" rid="ref-30">30</xref>]. These measures enable a thorough assessment of the performance for both classification and diagnostic confidence of models.</p>
<fig id="fig-5">
<label>Figure 5</label>
<caption>
<title>End-to-end methodology flow of the proposed skin lesion classification system</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMES_67999-fig-5.tif"/>
</fig>
<sec id="s5_1">
<label>5.1</label>
<title>Results</title>
<p>A comprehensive comparison of all the discussed models in terms of accuracy, recall, precision, F1 Score, and AUC is shown in <xref ref-type="table" rid="table-4">Table 4</xref>. Overall, this evaluation provides a wider view for each model&#x2019;s performance in skin cancer detection than the one gained through a single-metric analysis. The metrics have their focuses: the accuracy evaluates the percentage of correct predictions that the model has made overall (Spanning from 87.79% for VGG16 to 90.61% for the proposed model), the recall evaluates the proportion of actual positive cases that got classified as such (From 81.59% for EfficientNet to 90.88% for the proposed CNN with Parallel Self-attention Transformer and GRU), the precision reflects the reliability of the positive predictions (The proposed model with the highest of 91.12%) and the F1-score is simply the average value of precision and recall (The proposed model with the highest maximum of 91.00%). The AUC measure takes the class ability to be separated from the rest at various thresholds considered (0.8781 for VGG16 and 0.9680 for the proposed model). Such a balanced and multimeric approach is highly desirable in medical diagnostics to minimize false positive and false negative errors. The proposed model is composed of CNN Parallel Self-Attention Transformer and GRU, and it gets the best scores among all the metrics. Robust generalization and reliability are indicated by its high precision and recall, making it suitable for clinical use. Moreover, the high AUC reveals its potential to differentiate malignant and benign skin lesions at different thresholds. The proposed model (CNN with SA Transformer &#x002B; GRU) was compared with the baselines using the paired <italic>t</italic>-test. Results demonstrate that all the models are significant (i.e., having <italic>p</italic>-values &#x003C; 0.005) for the classification accuracy, proving the statistical superiority of the proposed model.</p>
<table-wrap id="table-4">
<label>Table 4</label>
<caption>
<title>Performance metrics and paired <italic>t</italic>-test <italic>p</italic>-values of proposed and legacy models</title>
</caption>
<table>
<colgroup>
<col align="center"/>
<col/>
<col/>
<col/>
<col/>
<col/>
<col/>
</colgroup>
<thead>
<tr>
<th align="center">Model (Alternative <italic>i</italic>)</th>
<th>Accuracy (%)</th>
<th>Recall (%)</th>
<th>Precision (%)</th>
<th>F1-Score (%)</th>
<th>AUC</th>
<th><italic>p</italic>-Value</th>
</tr>
</thead>
<tbody>
<tr>
<td>VGG16</td>
<td>87.79</td>
<td>87.26</td>
<td>86.80</td>
<td>87.03</td>
<td>0.8781</td>
<td>0.000005</td>
</tr>
<tr>
<td>CNN</td>
<td>88.95</td>
<td>84.43</td>
<td>91.10</td>
<td>87.63</td>
<td>0.8864</td>
<td>0.000152</td>
</tr>
<tr>
<td>ResNet &#x002B; BiLSTM</td>
<td>89.07</td>
<td>84.97</td>
<td>96.90</td>
<td>90.53</td>
<td>0.8832</td>
<td>0.000018</td>
</tr>
<tr>
<td>AlexNet</td>
<td>89.28</td>
<td>90.87</td>
<td>89.95</td>
<td>90.41</td>
<td>0.8919</td>
<td>0.000028</td>
</tr>
<tr>
<td>MobileNet</td>
<td>89.66</td>
<td>85.84</td>
<td>95.28</td>
<td>90.30</td>
<td>0.8912</td>
<td>0.000228</td>
</tr>
<tr>
<td>CNN with Parallel (Transformer &#x002B; GRU)</td>
<td>89.85</td>
<td>87.61</td>
<td>89.73</td>
<td>88.66</td>
<td>0.9527</td>
<td>0.000869</td>
</tr>
<tr>
<td>EfficientNet</td>
<td>89.87</td>
<td>81.59</td>
<td>82.99</td>
<td>82.28</td>
<td>0.9679</td>
<td>0.009169</td>
</tr>
<tr>
<td>DenseNet and XGBoost Sequentially</td>
<td>89.96</td>
<td>85.10</td>
<td>91.11</td>
<td>88.00</td>
<td>0.9675</td>
<td>0.004754</td>
</tr>
<tr>
<td><bold>Proposed - CNN with Parallel (SA Trans &#x002B; GRU)</bold></td>
<td><bold>90.61</bold></td>
<td><bold>90.88</bold></td>
<td><bold>91.12</bold></td>
<td><bold>91.00</bold></td>
<td><bold>0.9680</bold></td>
<td>&#x2014;</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The ROC curves for the proposed model and other baseline models are presented in <xref ref-type="fig" rid="fig-6">Fig. 6</xref>. We find that the AUC score of the proposed <bold>CNN with Parallel (SA Transformer &#x002B; GRU)</bold> is much higher, which means it has a better class discrimination performance. This is crucial for skin cancer detection, where avoiding false negatives (missing a cancer) is vital. The ROC curve of the proposed model consistently achieves superiority over those of other conventional models like VGG16, MobileNet, and ResNet &#x002B; BiLSTM (Bidirectional Long Short-Term Memory Network). It has a high true positive rate even with a low false positive rate, indicating its generality with different thresholds. This generalization to the models contributes to the robustness of the model when applied in actual clinical practice, where misclassifications would lead to life-changing decisions.</p>
<fig id="fig-6">
<label>Figure 6</label>
<caption>
<title>ROC curves for proposed and baseline models</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMES_67999-fig-6.tif"/>
</fig>
<p>The precision-recall curve is another important evaluation metric, especially in cases where the data set is imbalanced, which is common in medical diagnostics. <xref ref-type="fig" rid="fig-7">Fig. 7</xref> presents the precision-recall comparison graph for the proposed model without data augmentation, achieving a precision-recall score of 0.89. <xref ref-type="fig" rid="fig-8">Fig. 8</xref> shows the comparison for the proposed model with the GAN-based data augmentation, achieving a significant improvement with a precision-recall score of 0.96. The precision-recall curves demonstrate the trade-off between precision (the proportion of true positive predictions out of all positive predictions) and recall (the proportion of true positive predictions out of all actual positive cases). With data augmentation, the model maintains high precision while achieving significantly better recall. This improvement is crucial in minimizing false positives and false negatives, ensuring that the model provides accurate diagnoses without missing any potentially malignant skin lesions. The high precision and recall values with augmentation indicate that the proposed model is well-suited for early and reliable skin cancer detection.</p>
<fig id="fig-7">
<label>Figure 7</label>
<caption>
<title>Precision-recall comparison (Without augmentation)-Score: 0.89</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMES_67999-fig-7.tif"/>
</fig><fig id="fig-8">
<label>Figure 8</label>
<caption>
<title>Precision-recall comparison (With augmentation)-Score: 0.96</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMES_67999-fig-8.tif"/>
</fig>
</sec>
<sec id="s5_2">
<label>5.2</label>
<title>Impact of Data Augmentation on Model Performance</title>
<p>To further quantify the effectiveness of GAN-based data augmentation, we compared the performance of the proposed model on the same dataset with and without augmentation. <xref ref-type="table" rid="table-5">Table 5</xref> summarizes the key metrics for both scenarios. Without data augmentation, the model achieved an accuracy of 81.88%, AUC of 0.8320, precision of 0.796, and recall of 0.819 (see <xref ref-type="fig" rid="fig-7">Fig. 7</xref>). In contrast, with GAN-based augmentation, the model&#x2019;s performance improved substantially, achieving 90.61% accuracy, 0.9680 AUC, 91.12% precision, and 90.88% recall. This demonstrates that data augmentation not only enhances the model&#x2019;s ability to generalize to unseen data but also significantly boosts its sensitivity and overall diagnostic reliability. These findings are consistent with previous research, showing that data increase is a critical strategy for improving model robustness, especially in medical imaging tasks with limited or imbalanced datasets [<xref ref-type="bibr" rid="ref-1">1</xref>,<xref ref-type="bibr" rid="ref-3">3</xref>,<xref ref-type="bibr" rid="ref-4">4</xref>].</p>
<table-wrap id="table-5">
<label>Table 5</label>
<caption>
<title>Comparison of model performance with and without GAN-based data augmentation</title>
</caption>
<table>
<colgroup>
<col/>
<col/>
<col/>
<col/>
<col/>
</colgroup>
<thead>
<tr>
<th>Setting</th>
<th>Accuracy (%)</th>
<th>AUC</th>
<th>Precision</th>
<th>Recall</th>
</tr>
</thead>
<tbody>
<tr>
<td>Without augmentation</td>
<td>81.88</td>
<td>0.8320</td>
<td>0.796</td>
<td>0.819</td>
</tr>
<tr>
<td>With augmentation (GAN)</td>
<td>90.61</td>
<td>0.9680</td>
<td>0.9112</td>
<td>0.9088</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>These experiments suggest that the application of GAN-based data augmentation is very useful for improving the classification performance of the proposed model with imbalanced medical data. To gain more insight into the classification of each model, normalized confusion matrices are shown in <xref ref-type="fig" rid="fig-9">Fig. 9</xref>, where the information of true positives (TP), true negatives (TN), false positives (FP) and false negatives (FN) is represented. This analysis provides insight into the strengths and weaknesses of each model in the accurate detection of malignant lesions. From the confusion matrix for the above models, the proposed model showed the best results with minimal false positives and false negatives. This is further evidence of its applicability to high-risk diagnostic applications, such as skin cancer diagnosis. Others, such as EfficientNet and DenseNet &#x002B; XGBoost, also exhibit a good result with the tradeoff between sensitivity and specificity being relatively lower.</p>
<fig id="fig-9">
<label>Figure 9</label>
<caption>
<title>Normalized confusion matrices of evaluated models</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMES_67999-fig-9a.tif"/>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMES_67999-fig-9b.tif"/>
</fig>
</sec>
<sec id="s5_3">
<label>5.3</label>
<title>Discussion</title>
<p>This research introduces a novel deep learning architecture that combines CNNs with parallel SA transformers and GRUs for enhanced feature extraction and sequence modeling in skin cancer detection. This architecture represents a significant departure from conventional pre-trained networks and is expected to demonstrate superior performance in capturing complex features and patterns associated with skin lesions. The research utilizes GAN-based augmentation to address the data imbalance problem prevalent in skin cancer datasets, specifically by generating synthetic melanoma images. This approach can improve the robustness of the model, improve its ability to generalize from limited data, and mitigate the risk of overfitting, leading to more reliable and accurate predictions. Early and accurate detection of skin cancer, particularly melanoma, is crucial for effective treatment and improved patient outcomes [<xref ref-type="bibr" rid="ref-31">31</xref>]. The proposed deep learning system aims to provide clinicians with a more objective and reliable diagnostic tool, potentially reducing the subjectivity inherent in visual evaluations of skin lesions [<xref ref-type="bibr" rid="ref-32">32</xref>]. The performance of the model was rigorously evaluated using a five-fold cross-validation. This comprehensive evaluation strategy ensures the stability and reliability of the results, providing insight into the consistency and generalizability of the model in different data splitting scenarios [<xref ref-type="bibr" rid="ref-14">14</xref>]. The evaluation further includes comparative analysis with individual model branches (CNN-only, CNN&#x002B;GRU, etc.), demonstrating the advantage of our hybrid configuration. Visual explanation techniques like Grad-CAM and metric-based comparisons validate the model&#x2019;s focus and decision reliability. The observed performance gains confirm the effectiveness of combining spatial, sequential, and global features for medical image classification.</p>
</sec>
<sec id="s5_4">
<label>5.4</label>
<title>Comparison with Existing Work</title>
<p>This study introduces a novel hybrid architecture, which is the combination of CNN, parallel SA Transformer, and GRU, to extract complex spatial and sequential patterns in skin lesions [<xref ref-type="bibr" rid="ref-33">33</xref>&#x2013;<xref ref-type="bibr" rid="ref-35">35</xref>]. CNN extracts hierarchical visual features, Transformer emphasizes the salient regions of an image, and GRU captures the sequential latent dependencies. This parallel processing results in a richer understanding of the lesion characteristics over simple or standalone CNN-based models. The use of GAN-based augmentation further improves the model to deal with class imbalance, yields a variety of synthetic melanoma images that enhance generalization and reduce overfitting [<xref ref-type="bibr" rid="ref-35">35</xref>]. In comparison with typical ML(Machine Learning) and pre-trained CNN, like VGG16 and AlexNet, which have fixed feature extractors, and lower results, the proposed model has better results in key evaluation parameters such as accuracy, precision, recall, f1 score, and AUC [<xref ref-type="bibr" rid="ref-35">35</xref>]. The proposed model achieves 90.61% accuracy, while VGG16, AlexNet, and standard CNN accuracy were 87.79%, 89.28% and 88.95%. It also performs better than a CNN&#x002B;Transformer&#x002B;GRU model (89.85%). While transfer learning approaches with AlexNet, ResNet, VGG are popular [<xref ref-type="bibr" rid="ref-36">36</xref>&#x2013;<xref ref-type="bibr" rid="ref-39">39</xref>], they typically need pre-trained generic features. Ensemble models also might achieve better performance, but they complicate the system without guaranteed improvement. In contrast, our proposed model learns task-specialized features with its distinctive architecture and also exploits GAN-based augmentation. While the findings are encouraging, more testing is required in multiple datasets and clinical scenarios to determine the generalizability and robustness of the results [<xref ref-type="bibr" rid="ref-16">16</xref>,<xref ref-type="bibr" rid="ref-34">34</xref>,<xref ref-type="bibr" rid="ref-35">35</xref>].</p>
</sec>
<sec id="s5_5">
<label>5.5</label>
<title>Comparison with State-of-the-Art Architectures</title>
<p>To contextualize the performance of our proposed model, we compared its results with some recent state-of-the-art (SOTA) deep learning architectures for skin cancer detection, as reported in the literature. <xref ref-type="table" rid="table-6">Table 6</xref> summarizes the key metrics of our model alongside leading transformer-based and hybrid models such as Swin Transformer and ConvNeXt.</p>
<table-wrap id="table-6">
<label>Table 6</label>
<caption>
<title>Comparison of the proposed model with recent SOTA models for skin cancer detection</title>
</caption>
<table>
<colgroup>
<col/>
<col/>
<col/>
<col/>
<col/>
</colgroup>
<thead>
<tr>
<th>Model</th>
<th>Accuracy (%)</th>
<th>Precision (%)</th>
<th>Recall (%)</th>
<th>F1-Score (%)</th>
</tr>
</thead>
<tbody>
<tr>
<td><bold>CNN with Parallel (SA Trans &#x002B; GRU)</bold></td>
<td><bold>90.61</bold></td>
<td><bold>91.12</bold></td>
<td><bold>90.88</bold></td>
<td><bold>91.00</bold></td>
</tr>
<tr>
<td>Swin Transformer [<xref ref-type="bibr" rid="ref-40">40</xref>]</td>
<td>89.39</td>
<td>89.13</td>
<td>83.66</td>
<td>86.10</td>
</tr>
<tr>
<td>ConvNeXt [<xref ref-type="bibr" rid="ref-41">41</xref>]</td>
<td>90.65</td>
<td>90.41</td>
<td>84.40</td>
<td>87.71</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>As shown, our model achieves highly competitive results, particularly in F1-score and recall, which are crucial for medical diagnostics. While ConvNeXt achieves slightly higher accuracy, our approach demonstrates a stronger balance of sensitivity and precision, supported by robust generalization due to GAN-based augmentation. Notably, our model also incorporates explainability through Grad-CAM (Gradient-weighted Class Activation Mapping) and addresses class imbalance, features not always present in SOTA benchmarks. Recent studies such as the enhanced Faster R-CNN optimized by artificial gorilla troops algorithm [<xref ref-type="bibr" rid="ref-42">42</xref>] and xCViT [<xref ref-type="bibr" rid="ref-43">43</xref>] have further advanced the field by introducing novel optimization and fusion strategies. These works highlight the ongoing evolution of deep learning methods for skin cancer detection and underscore the importance of benchmarking hybrid and explainable models. Future work will focus on direct benchmarking against these and other emerging SOTA approaches, as well as integrating multi-modal and self-supervised learning strategies.</p>
</sec>
<sec id="s5_6">
<label>5.6</label>
<title>Strengths and Limitations</title>
<p>The proposed CNN with parallel SA Transformer and GRU architecture presents a novel and effective solution for skin cancer detection by integrating spatial, global, and sequential feature extraction mechanisms [<xref ref-type="bibr" rid="ref-15">15</xref>,<xref ref-type="bibr" rid="ref-33">33</xref>,<xref ref-type="bibr" rid="ref-35">35</xref>,<xref ref-type="bibr" rid="ref-44">44</xref>]. CNN captures local visual features, while SA Transformer models global dependencies, and GRU captures sequential patterns, resulting in a rich and holistic representation of skin lesions. To mitigate data imbalance, common in medical imaging, GAN-based augmentation is used to generate synthetic samples, enhancing the diversity of the data set and improving model generalization [<xref ref-type="bibr" rid="ref-3">3</xref>,<xref ref-type="bibr" rid="ref-12">12</xref>]. This multicomponent architecture increases robustness by exposing the model to a wide range of lesion variations during training, which is essential for real-world diagnostic accuracy [<xref ref-type="bibr" rid="ref-33">33</xref>,<xref ref-type="bibr" rid="ref-34">34</xref>]. The model achieves superior performance in accuracy, precision, recall, and AUC compared to baseline models such as EfficientNet, MobileNet, VGG16, AlexNet, and DenseNet with XGBoost, as confirmed by cross-validation [<xref ref-type="bibr" rid="ref-12">12</xref>,<xref ref-type="bibr" rid="ref-45">45</xref>]. The use of dermoscopic images further improves clinical relevance by highlighting fine skin structures, supporting its utility to assist dermatologists with accurate and timely diagnoses [<xref ref-type="bibr" rid="ref-3">3</xref>,<xref ref-type="bibr" rid="ref-12">12</xref>,<xref ref-type="bibr" rid="ref-14">14</xref>]. Despite its strengths, the model has limitations. The dependency on GAN-generated data may not fully reflect the diversity of real-world lesion conditions, introducing biases induced by the data set [<xref ref-type="bibr" rid="ref-4">4</xref>,<xref ref-type="bibr" rid="ref-15">15</xref>,<xref ref-type="bibr" rid="ref-33">33</xref>,<xref ref-type="bibr" rid="ref-35">35</xref>]. This model has only been evaluated on public datasets and has not yet been validated with real-world clinical data, which may affect its generalizability in actual clinical environments. Furthermore, the computational demands of the architecture could limit its deployment in low-resource environments. Furthermore, the interpretability of the model remains limited, making its decision-making process less transparent for clinical use [<xref ref-type="bibr" rid="ref-35">35</xref>,<xref ref-type="bibr" rid="ref-44">44</xref>]. Although StyleGAN3-based synthetic data augmentation improves class balance, synthetic images may not fully capture the diversity of real-world skin lesions, potentially limiting the model&#x2019;s generalization in complex clinical settings. Future extensions should focus on evaluating the model in diverse skin tones and demographics, incorporating explainable AI (XAI) techniques, and optimizing efficiency to support greater adoption.</p>
</sec>
<sec id="s5_7">
<label>5.7</label>
<title>Model Explainability with Grad-CAM</title>
<p>We also used the Grad-CAM approach to visualize the most discriminative regions in the input images responsible for the model&#x2019;s decisions to improve the interpretation of our model predictions. Since Grad-CAM is intended for CNN and does not directly apply to RNN, we restricted the explanation to the CNN part of the model. The examples of Grad-CAM heatmaps overlaid over sampled images from the test set are shown in <xref ref-type="fig" rid="fig-10">Fig. 10</xref>. The highlighted areas are important parts of the image where the model learned to classify, by providing insight into how the model is making decisions. These visualizations show that the model attends to semantically sensible locations, which enhances our trust in its predictions and supports model interpretability.</p>
<fig id="fig-10">
<label>Figure 10</label>
<caption>
<title>Sample Grad-CAM visualizations showing the regions of three input images that most influenced the model&#x2019;s predictions. The heatmaps highlight areas of high importance for the model&#x2019;s decision-making process</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMES_67999-fig-10.tif"/>
</fig>
</sec>
<sec id="s5_8">
<label>5.8</label>
<title>Ablation Study</title>
<p>To evaluate the individual contribution of each architectural component, an ablation study was performed between 4 model variants named CNN, CNN_Transformer, CNN_GRU, and the full hybrid CNN with Parallel (Self-Attention Transformer &#x002B; GRU). The results with key evaluation parameters are summarized in <xref ref-type="table" rid="table-7">Table 7</xref>. The result shows that the Proposed Hybrid model (CNN with Parallel (SA Trans &#x002B; GRU)) provides the best performance with accuracy 90.61%, precision 91.12%, recall 90.88%, F1-score 91.00%, AUC 0.968. The two alternative models CNN_Transformer and CNN_GRU perform better than the baseline CNN, but the combined joint use of both modules gives the best performance results. This confirms that the combination of local feature extraction (CNN), global attention (Transformer), and sequential modeling (GRU) leads to more robust and reliable skin cancer classification.</p>
<table-wrap id="table-7">
<label>Table 7</label>
<caption>
<title>Ablation study comparing the baseline CNN, CNN_Transformer, CNN_GRU, and the proposed hybrid model (CNN with Parallel Self-Attention Transformer and GRU)</title>
</caption>
<table>
<colgroup>
<col/>
<col/>
<col/>
<col/>
<col/>
<col/>
</colgroup>
<thead>
<tr>
<th>Model</th>
<th>Accuracy</th>
<th>Precision</th>
<th>Recall</th>
<th>F1</th>
<th>AUC</th>
</tr>
</thead>
<tbody>
<tr>
<td>CNN</td>
<td>88.95</td>
<td>91.10</td>
<td>84.43</td>
<td>87.63</td>
<td>0.8864</td>
</tr>
<tr>
<td>CNN_Transformer</td>
<td>87.61</td>
<td>87.99</td>
<td>87.61</td>
<td>87.49</td>
<td>0.8764</td>
</tr>
<tr>
<td>CNN_GRU</td>
<td>88.81</td>
<td>89.23</td>
<td>88.81</td>
<td>88.71</td>
<td>0.8801</td>
</tr>
<tr>
<td>CNN with Parallel (SA Trans &#x002B; GRU)</td>
<td>90.61</td>
<td>91.12</td>
<td>90.88</td>
<td>91.00</td>
<td>0.9680</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
</sec>
<sec id="s6">
<label>6</label>
<title>Comparative Analysis Using TOPSIS</title>
<p>To perform a rigorous and objective evaluation of the proposed model against baseline and ensemble architectures, we utilize the <bold>Technique for Order of Preference by Similarity to Ideal Solution (TOPSIS)</bold>. TOPSIS is a well-established multicriteria decision-making (MCDM) method that ranks alternatives based on their geometric distances from an ideal best and an ideal worst solution [<xref ref-type="bibr" rid="ref-46">46</xref>]. This approach is particularly useful when evaluating models using multiple conflicting performance metrics such as accuracy, recall, precision, and AUC. By applying TOPSIS, we can determine which model offers the best overall trade-off across all evaluation criteria.</p>
<sec id="s6_1">
<label>6.1</label>
<title>TOPSIS Methodology</title>
<p>TOPSIS works by identifying solutions that are simultaneously closest to the ideal solution (representing the best performance in all metrics) and farthest from the negative ideal solution (representing the worst). The steps involved are the following.
<list list-type="simple">
<list-item><label>1.</label><p><bold>Construct the Decision Matrix:</bold> A matrix <inline-formula id="ieqn-42"><mml:math id="mml-ieqn-42"><mml:mi>X</mml:mi><mml:mo>=</mml:mo><mml:mo stretchy="false">[</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mo stretchy="false">]</mml:mo><mml:mrow><mml:mi>m</mml:mi><mml:mo>&#x00D7;</mml:mo><mml:mi>n</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>, where <italic>m</italic> is the number of models and <inline-formula id="ieqn-43"><mml:math id="mml-ieqn-43"><mml:mi>n</mml:mi><mml:mo>=</mml:mo><mml:mn>4</mml:mn></mml:math></inline-formula> is the number of criteria (Accuracy, Recall, Precision, AUC).</p></list-item>
<list-item><label>2.</label><p><bold>Normalize the Decision Matrix:</bold> To eliminate the effect of different scales across metrics, we normalize the matrix using:
<disp-formula id="ueqn-11"><mml:math id="mml-ueqn-11" display="block"><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:msqrt><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>k</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>m</mml:mi></mml:mrow></mml:munderover><mml:msubsup><mml:mi>x</mml:mi><mml:mrow><mml:mi>k</mml:mi><mml:mi>j</mml:mi></mml:mrow><mml:mn>2</mml:mn></mml:msubsup></mml:msqrt></mml:mfrac><mml:mtext>&#x00A0;&#x00A0;&#x00A0;&#x00A0;</mml:mtext><mml:mi mathvariant="normal">&#x2200;</mml:mi><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi></mml:math></disp-formula></p></list-item>
<list-item><label>3.</label><p><bold>Weight the Normalized Matrix:</bold> Each criterion is assigned an equal weight (<inline-formula id="ieqn-44"><mml:math id="mml-ieqn-44"><mml:msub><mml:mi>w</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mn>0.25</mml:mn></mml:math></inline-formula>), resulting in the weighted normalized matrix:
<disp-formula id="ueqn-12"><mml:math id="mml-ueqn-12" display="block"><mml:msub><mml:mi>v</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>w</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:mo>&#x22C5;</mml:mo><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:math></disp-formula></p></list-item>
<list-item><label>4.</label><p><bold>Determine Ideal and Negative-Ideal Solutions:</bold>
<disp-formula id="ueqn-13"><mml:math id="mml-ueqn-13" display="block"><mml:msup><mml:mi>A</mml:mi><mml:mo>+</mml:mo></mml:msup><mml:mo>=</mml:mo><mml:mo fence="false" stretchy="false">{</mml:mo><mml:munder><mml:mo movablelimits="true" form="prefix">max</mml:mo><mml:mi>i</mml:mi></mml:munder><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>v</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo fence="false" stretchy="false">}</mml:mo><mml:mo>,</mml:mo><mml:mtext>&#x00A0;&#x00A0;&#x00A0;&#x00A0;</mml:mtext><mml:msup><mml:mi>A</mml:mi><mml:mo>&#x2212;</mml:mo></mml:msup><mml:mo>=</mml:mo><mml:mo fence="false" stretchy="false">{</mml:mo><mml:munder><mml:mo movablelimits="true" form="prefix">min</mml:mo><mml:mi>i</mml:mi></mml:munder><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>v</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo fence="false" stretchy="false">}</mml:mo></mml:math></disp-formula></p></list-item>
<list-item><label>5.</label><p><bold>Compute Separation Measures:</bold> The Euclidean distance of each model from the ideal and negative-ideal solutions:
<disp-formula id="ueqn-14"><mml:math id="mml-ueqn-14" display="block"><mml:msubsup><mml:mi>S</mml:mi><mml:mi>i</mml:mi><mml:mo>+</mml:mo></mml:msubsup><mml:mo>=</mml:mo><mml:msqrt><mml:munder><mml:mo>&#x2211;</mml:mo><mml:mi>j</mml:mi></mml:munder><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>v</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msubsup><mml:mi>v</mml:mi><mml:mi>j</mml:mi><mml:mo>+</mml:mo></mml:msubsup><mml:msup><mml:mo stretchy="false">)</mml:mo><mml:mn>2</mml:mn></mml:msup></mml:msqrt><mml:mo>,</mml:mo><mml:mtext>&#x00A0;&#x00A0;&#x00A0;&#x00A0;</mml:mtext><mml:msubsup><mml:mi>S</mml:mi><mml:mi>i</mml:mi><mml:mo>&#x2212;</mml:mo></mml:msubsup><mml:mo>=</mml:mo><mml:msqrt><mml:munder><mml:mo>&#x2211;</mml:mo><mml:mi>j</mml:mi></mml:munder><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>v</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msubsup><mml:mi>v</mml:mi><mml:mi>j</mml:mi><mml:mo>&#x2212;</mml:mo></mml:msubsup><mml:msup><mml:mo stretchy="false">)</mml:mo><mml:mn>2</mml:mn></mml:msup></mml:msqrt></mml:math></disp-formula></p></list-item>
<list-item><label>6.</label><p><bold>Compute Closeness Coefficient:</bold>
<disp-formula id="ueqn-15"><mml:math id="mml-ueqn-15" display="block"><mml:msubsup><mml:mi>C</mml:mi><mml:mi>i</mml:mi><mml:mo>&#x2217;</mml:mo></mml:msubsup><mml:mo>=</mml:mo><mml:mfrac><mml:msubsup><mml:mi>S</mml:mi><mml:mi>i</mml:mi><mml:mo>&#x2212;</mml:mo></mml:msubsup><mml:mrow><mml:msubsup><mml:mi>S</mml:mi><mml:mi>i</mml:mi><mml:mo>+</mml:mo></mml:msubsup><mml:mo>+</mml:mo><mml:msubsup><mml:mi>S</mml:mi><mml:mi>i</mml:mi><mml:mo>&#x2212;</mml:mo></mml:msubsup></mml:mrow></mml:mfrac></mml:math></disp-formula></p></list-item>
<list-item><label>7.</label><p><bold>Rank the Alternatives:</bold> Models are ranked based on descending values of <inline-formula id="ieqn-45"><mml:math id="mml-ieqn-45"><mml:msubsup><mml:mi>C</mml:mi><mml:mi>i</mml:mi><mml:mo>&#x2217;</mml:mo></mml:msubsup></mml:math></inline-formula>. The higher the value, the closer the model is to the ideal solution.</p></list-item>
</list></p>
</sec>
<sec id="s6_2">
<label>6.2</label>
<title>Normalized Decision Matrix</title>
<p>The following table presents the normalized and weighted decision matrix derived from the experimental results shown in <xref ref-type="table" rid="table-8">Table 8</xref>:</p>
<table-wrap id="table-8">
<label>Table 8</label>
<caption>
<title>Weighted normalized decision matrix for TOPSIS analysis</title>
</caption>
<table>
<colgroup>
<col/>
<col/>
<col/>
<col/>
<col/>
</colgroup>
<thead>
<tr>
<th>Model</th>
<th>Accuracy</th>
<th>Recall</th>
<th>Precision</th>
<th>AUC</th>
</tr>
</thead>
<tbody>
<tr>
<td>VGG16</td>
<td>0.000000</td>
<td>0.152583</td>
<td>0.068476</td>
<td>0.000000</td>
</tr>
<tr>
<td>CNN</td>
<td>0.102837</td>
<td>0.076426</td>
<td>0.145758</td>
<td>0.023081</td>
</tr>
<tr>
<td>ResNet &#x002B; BiLSTM</td>
<td>0.113475</td>
<td>0.090958</td>
<td>0.250000</td>
<td>0.014182</td>
</tr>
<tr>
<td>AlexNet</td>
<td>0.132092</td>
<td>0.249731</td>
<td>0.125090</td>
<td>0.038376</td>
</tr>
<tr>
<td>MobileNet</td>
<td>0.165780</td>
<td>0.114370</td>
<td>0.220884</td>
<td>0.036429</td>
</tr>
<tr>
<td>CNN with Parallel (Transformer &#x002B; GRU)</td>
<td>0.182624</td>
<td>0.162002</td>
<td>0.121136</td>
<td>0.207453</td>
</tr>
<tr>
<td>EfficientNet</td>
<td>0.184397</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.249722</td>
</tr>
<tr>
<td>DenseNet and XGBoost Sequentially</td>
<td>0.192376</td>
<td>0.094456</td>
<td>0.145938</td>
<td>0.248610</td>
</tr>
<tr>
<td><bold>CNN with Parallel (SA Trans &#x002B; GRU)</bold></td>
<td><bold>0.250000</bold></td>
<td><bold>0.250000</bold></td>
<td><bold>0.146118</bold></td>
<td><bold>0.250000</bold></td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s6_3">
<label>6.3</label>
<title>TOPSIS Results and Discussion</title>
<p>Using the above weighted normalized data, we calculate the closeness coefficient <inline-formula id="ieqn-46"><mml:math id="mml-ieqn-46"><mml:msubsup><mml:mi>C</mml:mi><mml:mi>i</mml:mi><mml:mo>&#x2217;</mml:mo></mml:msubsup></mml:math></inline-formula> for each model. The resulting rankings are presented in the <xref ref-type="table" rid="table-9">Table 9</xref>:</p>
<table-wrap id="table-9">
<label>Table 9</label>
<caption>
<title>TOPSIS results: Closeness coefficients and rankings</title>
</caption>
<table>
<colgroup>
<col/>
<col/>
<col/>
</colgroup>
<thead>
<tr>
<th>Model</th>
<th>Closeness coefficient (<inline-formula id="ieqn-47"><mml:math id="mml-ieqn-47"><mml:msubsup><mml:mi mathvariant="bold-italic">C</mml:mi><mml:mi mathvariant="bold-italic">i</mml:mi><mml:mo mathvariant="bold">&#x2217;</mml:mo></mml:msubsup></mml:math></inline-formula>)</th>
<th>Rank</th>
</tr>
</thead>
<tbody>
<tr>
<td><bold>CNN with Parallel (SA Trans &#x002B; GRU)</bold></td>
<td><bold>0.8148</bold></td>
<td><bold>1</bold></td>
</tr>
<tr>
<td>CNN with Parallel (Transformer &#x002B; GRU)</td>
<td>0.6616</td>
<td>2</td>
</tr>
<tr>
<td>DenseNet and XGBoost Sequentially</td>
<td>0.6472</td>
<td>3</td>
</tr>
<tr>
<td>AlexNet</td>
<td>0.5332</td>
<td>4</td>
</tr>
<tr>
<td>MobileNet</td>
<td>0.5289</td>
<td>5</td>
</tr>
<tr>
<td>ResNet &#x002B; BiLSTM</td>
<td>0.4786</td>
<td>6</td>
</tr>
<tr>
<td>EfficientNet</td>
<td>0.4633</td>
<td>7</td>
</tr>
<tr>
<td>CNN</td>
<td>0.3665</td>
<td>8</td>
</tr>
<tr>
<td>VGG16</td>
<td>0.2901</td>
<td>9</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The model <bold>CNN with Parallel (SA Transformer &#x002B; GRU)</bold> ranks first with the highest closeness coefficient of 0.8148. This indicates that it offers the best trade-off in accuracy, recall, precision, and AUC in the evaluated setup. Therefore, the proposed model demonstrates robust overall performance for skin cancer classification within the TOPSIS framework.</p>
</sec>
</sec>
<sec id="s7">
<label>7</label>
<title>Conclusion and Future Direction</title>
<p>The field of skin cancer detection is evolving at a rapid pace, and deep learning models have demonstrated great potential in image-based classification activities. However, despite these advances, the available models are often limited by data availability, suffer from overfitting, and sometimes struggle with generalization issues. The proposed hybrid model that combines CNN, the self-attention transformer, and GRU addresses these shortcomings as it helps to model local and global spatial-temporal dependencies simultaneously in dermoscopic images. The use of a GAN-based data augmentation strategy solves the problem of data imbalance and improves the robustness of the model. Unlike traditional CNN approaches that focus on binary classification and are prone to overfitting, the proposed architecture demonstrates strong performance across multiple evaluation metrics, achieving an accuracy of 90.61%, a recall of 90.88%, a precision of 91.12%, an F1-score of 91%, and an AUC of 0.968. The combination of feature extraction, sequence learning, and the use of attention mechanisms, all within a coherent framework, plays a key role in the efficiency of the model. Multicriteria decision analysis via the TOPSIS technique further confirms the superiority of the model as the proposed model has the highest value of closeness coefficient, which explains the best overall trade-off of the four performance measures (accuracy, recall, precision, and AUC). Future work can include clinical validation on more variate test sets, integration with other patient data sources, development of explainable AI methods to improve the interpretability, and optimization for deployment on mobile or embedded devices.</p>
<p>While the proposed model demonstrates strong performance on the HAM10000 dataset, its domain generalizability remains to be evaluated. In future work, we plan to assess the robustness of the model by testing it on additional publicly available datasets, such as ISIC 2018. This will help validate the consistency and generalizability of our approach across different data distributions and acquisition settings. Incorporating diverse datasets will further enhance the model&#x2019;s reliability and clinical applicability. Expanding this approach to other dermatological conditions may also improve its practical utility in healthcare settings.</p>
</sec>
</body>
<back>
<ack>
<p>The authors extend their appreciation to the Deanship of Research and Graduate Studies at King Khalid University for funding this work through Small Research Project under grant number RGP1/108/46.</p>
</ack>
<sec>
<title>Funding Statement</title>
<p>The research is funded by King Khalid University, Saudi Arabia.</p>
</sec>
<sec>
<title>Author Contributions</title>
<p>Alex Varghese, Achin Jain, Mohammed Inamur Rahman, Mudassir Khan, Arun Kumar Dubey, Iqrar Ahmed, and Yash Prakash Narayan participated in designing the methodology, developing the concept, implementing the code, performing the experiments, and drafting the manuscript. Arvind Panwar, Anurag Choubey, and Saurav Mallik validated the approach, supervised the research process, and contributed to the review and editing of the manuscript. All authors reviewed the results and approved the final version of the manuscript.</p>
</sec>
<sec sec-type="data-availability">
<title>Availability of Data and Materials</title>
<p>The data set utilized in this study consists of dermoscopic images sourced from publicly available repositories, primarily the HAM10000 dataset [<xref ref-type="bibr" rid="ref-22">22</xref>], <ext-link ext-link-type="uri" xlink:href="https://www.kaggle.com/datasets/kmader/skin-cancer-mnist-ham10000">https://www.kaggle.com/datasets/kmader/skin-cancer-mnist-ham10000</ext-link> (accessed on 30 July 2025).</p>
</sec>
<sec>
<title>Ethics Approval</title>
<p>Not applicable.</p>
</sec>
<sec>
<title>Informed Consent</title>
<p>Not applicable.</p>
</sec>
<sec sec-type="COI-statement">
<title>Conflicts of Interest</title>
<p>The authors declare no conflicts of interest to report regarding the present study.</p>
</sec>
<ref-list content-type="authoryear">
<title>References</title>
<ref id="ref-1"><label>[1]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Henrikson</surname> <given-names>NB</given-names></string-name>, <string-name><surname>Ivlev</surname> <given-names>I</given-names></string-name>, <string-name><surname>Blasi</surname> <given-names>PR</given-names></string-name>, <string-name><surname>Nguyen</surname> <given-names>MB</given-names></string-name>, <string-name><surname>Senger</surname> <given-names>CA</given-names></string-name>, <string-name><surname>Perdue</surname> <given-names>LA</given-names></string-name>, <etal>et al</etal></person-group>. <article-title>Skin cancer screening: updated evidence report and systematic review for the us preventive services task force</article-title>. <source>JAMA</source>. <year>2023</year>;<volume>329</volume>(<issue>15</issue>):<fpage>1296</fpage>&#x2013;<lpage>1307</lpage>. doi:<pub-id pub-id-type="doi">10.1001/jama.2023.3262</pub-id>; <pub-id pub-id-type="pmid">37071090</pub-id></mixed-citation></ref>
<ref id="ref-2"><label>[2]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Alam</surname> <given-names>TM</given-names></string-name>, <string-name><surname>Shaukat</surname> <given-names>K</given-names></string-name>, <string-name><surname>Khan</surname> <given-names>WA</given-names></string-name>, <string-name><surname>Hameed</surname> <given-names>IA</given-names></string-name>, <string-name><surname>Almuqren</surname> <given-names>LA</given-names></string-name>, <string-name><surname>Raza</surname> <given-names>MA</given-names></string-name>, <etal>et al</etal></person-group>. <article-title>An efficient deep learning-based skin cancer classifier for an imbalanced dataset</article-title>. <source>Diagnostics</source>. <year>2022</year>;<volume>12</volume>(<issue>9</issue>):<fpage>2115</fpage>. doi:<pub-id pub-id-type="doi">10.3390/diagnostics12092115</pub-id>; <pub-id pub-id-type="pmid">36140516</pub-id></mixed-citation></ref>
<ref id="ref-3"><label>[3]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Esteva</surname> <given-names>A</given-names></string-name>, <string-name><surname>Kuprel</surname> <given-names>B</given-names></string-name>, <string-name><surname>Novoa</surname> <given-names>RA</given-names></string-name>, <string-name><surname>Ko</surname> <given-names>J</given-names></string-name>, <string-name><surname>Swetter</surname> <given-names>SM</given-names></string-name>, <string-name><surname>Blau</surname> <given-names>HM</given-names></string-name>, <etal>et al</etal></person-group>. <article-title>Dermatologist-level classification of skin cancer with deep neural networks</article-title>. <source>Nature</source>. <year>2017</year>;<volume>542</volume>(<issue>7639</issue>):<fpage>115</fpage>&#x2013;<lpage>8</lpage>. doi:<pub-id pub-id-type="doi">10.1038/nature21056</pub-id>; <pub-id pub-id-type="pmid">28117445</pub-id></mixed-citation></ref>
<ref id="ref-4"><label>[4]</label><mixed-citation publication-type="conf-proc"><person-group person-group-type="author"><string-name><surname>Bisla</surname> <given-names>D</given-names></string-name>, <string-name><surname>Choromanska</surname> <given-names>A</given-names></string-name>, <string-name><surname>Berman</surname> <given-names>RS</given-names></string-name>, <string-name><surname>Stein</surname> <given-names>JA</given-names></string-name>, <string-name><surname>Polsky</surname> <given-names>D</given-names></string-name></person-group>. <article-title>Towards automated melanoma detection with deep learning: data purification and augmentation</article-title>. In: <conf-name>2019 IEEE/CVF Conference on Computer Vision and Pattern Recognition Workshops (CVPRW)</conf-name>; <year>2019 Jun 16&#x2013;17</year>; <publisher-loc>Long Beach, CA, USA</publisher-loc>. p. <fpage>2720</fpage>&#x2013;<lpage>8</lpage>.</mixed-citation></ref>
<ref id="ref-5"><label>[5]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Akter</surname> <given-names>M</given-names></string-name>, <string-name><surname>Khatun</surname> <given-names>R</given-names></string-name>, <string-name><surname>Talukder</surname> <given-names>MA</given-names></string-name>, <string-name><surname>Islam</surname> <given-names>MM</given-names></string-name>, <string-name><surname>Uddin</surname> <given-names>MA</given-names></string-name>, <string-name><surname>Ahamed</surname> <given-names>MKU</given-names></string-name>, <etal>et al</etal></person-group>. <article-title>An integrated deep learning model for skin cancer detection using hybrid feature fusion technique</article-title>. <source>Biomed Mater Dev</source>. <year>2025</year>;<volume>3</volume>:<fpage>1433</fpage>&#x2013;<lpage>47</lpage>.</mixed-citation></ref>
<ref id="ref-6"><label>[6]</label><mixed-citation publication-type="conf-proc"><person-group person-group-type="author"><string-name><surname>Mahmud</surname> <given-names>F</given-names></string-name>, <string-name><surname>Mahfiz</surname> <given-names>MM</given-names></string-name>, <string-name><surname>Kabir</surname> <given-names>MZI</given-names></string-name>, <string-name><surname>Abdullah</surname> <given-names>Y</given-names></string-name></person-group>. <article-title>An interpretable deep learning approach for skin cancer categorization</article-title>. In: <conf-name>2023 26th International Conference on Computer and Information Technology (ICCIT)</conf-name>; <year>2023 Dec 13&#x2013;15</year>; <publisher-loc>Cox&#x2019;s Bazar, Bangladesh</publisher-loc>. p. <fpage>1</fpage>&#x2013;<lpage>6</lpage>.</mixed-citation></ref>
<ref id="ref-7"><label>[7]</label><mixed-citation publication-type="conf-proc"><person-group person-group-type="author"><string-name><surname>Chaturvedi</surname> <given-names>SS</given-names></string-name>, <string-name><surname>Gupta</surname> <given-names>K</given-names></string-name>, <string-name><surname>Prasad</surname> <given-names>PS</given-names></string-name></person-group>. <article-title>Skin lesion analyser: an efficient seven-way multi-class skin cancer classification using mobilenet</article-title>. In: <conf-name>Advanced Machine Learning Technologies and Applications: Proceedings of AMLTA 2020</conf-name>. <publisher-loc>Cham, Switzerland</publisher-loc>: <publisher-name>Springer</publisher-name>; <year>2021</year>. p. <fpage>165</fpage>&#x2013;<lpage>176</lpage>.</mixed-citation></ref>
<ref id="ref-8"><label>[8]</label><mixed-citation publication-type="other"><person-group person-group-type="author"><string-name><surname>Islam</surname> <given-names>MS</given-names></string-name>, <string-name><surname>Panta</surname> <given-names>S</given-names></string-name></person-group>. <article-title>Skin cancer images classification using transfer learning techniques</article-title>. <comment>arXiv:2406.12954</comment>. <year>2024</year>.</mixed-citation></ref>
<ref id="ref-9"><label>[9]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Bala</surname> <given-names>D</given-names></string-name>, <string-name><surname>Hossain</surname> <given-names>MS</given-names></string-name>, <string-name><surname>Hossain</surname> <given-names>MA</given-names></string-name>, <string-name><surname>Abdullah</surname> <given-names>MI</given-names></string-name>, <string-name><surname>Rahman</surname> <given-names>MM</given-names></string-name>, <string-name><surname>Manavalan</surname> <given-names>B</given-names></string-name>, <etal>et al</etal></person-group>. <article-title>Monkeynet: a robust deep convolutional neural network for monkeypox disease detection and classification</article-title>. <source>Neural Netw</source>. <year>2023</year>;<volume>161</volume>:<fpage>757</fpage>&#x2013;<lpage>75</lpage>. doi:<pub-id pub-id-type="doi">10.1016/j.neunet.2023.02.022</pub-id>; <pub-id pub-id-type="pmid">36848828</pub-id></mixed-citation></ref>
<ref id="ref-10"><label>[10]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Hameed</surname> <given-names>M</given-names></string-name>, <string-name><surname>Zameer</surname> <given-names>A</given-names></string-name>, <string-name><surname>Raja</surname> <given-names>MAZ</given-names></string-name></person-group>. <article-title>A comprehensive systematic review: advancements in skin cancer classification and segmentation using the isic dataset</article-title>. <source>Comput Model Eng Sci</source>. <year>2024</year>;<volume>140</volume>(<issue>3</issue>):<fpage>2131</fpage>&#x2013;<lpage>64</lpage>. doi:<pub-id pub-id-type="doi">10.32604/cmes.2024.050124</pub-id>.</mixed-citation></ref>
<ref id="ref-11"><label>[11]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Srividhya</surname> <given-names>V</given-names></string-name>, <string-name><surname>Sujatha</surname> <given-names>K</given-names></string-name>, <string-name><surname>Ponmagal</surname> <given-names>RS</given-names></string-name>, <string-name><surname>Durgadevi</surname> <given-names>G</given-names></string-name>, <string-name><surname>Madheshwaran</surname> <given-names>L</given-names></string-name></person-group>. <article-title>Vision based detection and categorization of skin lesions using deep learning neural networks</article-title>. <source>Procedia Comput Sci</source>. <year>2020</year>;<volume>171</volume>:<fpage>1726</fpage>&#x2013;<lpage>35</lpage>. doi:<pub-id pub-id-type="doi">10.1016/j.procs.2020.04.185</pub-id>.</mixed-citation></ref>
<ref id="ref-12"><label>[12]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Serte</surname> <given-names>S</given-names></string-name>, <string-name><surname>Demirel</surname> <given-names>H</given-names></string-name></person-group>. <article-title>Wavelet-based deep learning for skin lesion classification</article-title>. <source>IET Image Process</source>. <year>2020</year>;<volume>14</volume>(<issue>4</issue>):<fpage>720</fpage>&#x2013;<lpage>6</lpage>. doi:<pub-id pub-id-type="doi">10.1049/iet-ipr.2019.0553</pub-id>.</mixed-citation></ref>
<ref id="ref-13"><label>[13]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Jinnai</surname> <given-names>S</given-names></string-name>, <string-name><surname>Yamazaki</surname> <given-names>N</given-names></string-name>, <string-name><surname>Hirano</surname> <given-names>Y</given-names></string-name>, <string-name><surname>Sugawara</surname> <given-names>Y</given-names></string-name>, <string-name><surname>Ohe</surname> <given-names>Y</given-names></string-name>, <string-name><surname>Hamamoto</surname> <given-names>R</given-names></string-name></person-group>. <article-title>The development of a skin cancer classification system for pigmented skin lesions using deep learning</article-title>. <source>Biomolecules</source>. <year>2020</year>;<volume>10</volume>(<issue>8</issue>):<fpage>1123</fpage>. doi:<pub-id pub-id-type="doi">10.3390/biom10081123</pub-id>; <pub-id pub-id-type="pmid">32751349</pub-id></mixed-citation></ref>
<ref id="ref-14"><label>[14]</label><mixed-citation publication-type="conf-proc"><person-group person-group-type="author"><string-name><surname>Mankawade</surname> <given-names>A</given-names></string-name>, <string-name><surname>Bodhankar</surname> <given-names>A</given-names></string-name>, <string-name><surname>Mahajan</surname> <given-names>A</given-names></string-name>, <string-name><surname>Prasad</surname> <given-names>D</given-names></string-name>, <string-name><surname>Mahajan</surname> <given-names>S</given-names></string-name>, <string-name><surname>Dhakalkar</surname> <given-names>R</given-names></string-name></person-group>. <article-title>Skin cancer detection and intensity calculator using deep learning</article-title>. In: <conf-name>2023 International Conference for Advancement in Technology (ICONAT)</conf-name>; <year>2023 Jan 24&#x2013;26</year>; <publisher-loc>Goa, India</publisher-loc>. p. <fpage>1</fpage>&#x2013;<lpage>4</lpage>.</mixed-citation></ref>
<ref id="ref-15"><label>[15]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Afrifa</surname> <given-names>S</given-names></string-name>, <string-name><surname>Varadarajan</surname> <given-names>V</given-names></string-name>, <string-name><surname>Appiahene</surname> <given-names>P</given-names></string-name>, <string-name><surname>Zhang</surname> <given-names>T</given-names></string-name>, <string-name><surname>Gyamfi</surname> <given-names>D</given-names></string-name>, <string-name><surname>Gyening</surname> <given-names>RMOM</given-names></string-name></person-group>. <article-title>Deep neural networks for skin cancer classification: analysis of melanoma cancer data</article-title>. <source>J Adv Inf Technol</source>. <year>2025</year>;<volume>16</volume>(<issue>1</issue>).</mixed-citation></ref>
<ref id="ref-16"><label>[16]</label><mixed-citation publication-type="other"><person-group person-group-type="author"><string-name><surname>Yilmaz</surname> <given-names>A</given-names></string-name>, <string-name><surname>Kalebasi</surname> <given-names>M</given-names></string-name>, <string-name><surname>Samoylenko</surname> <given-names>Y</given-names></string-name>, <string-name><surname>Guvenilir</surname> <given-names>ME</given-names></string-name>, <string-name><surname>Uvet</surname> <given-names>H</given-names></string-name></person-group>. <article-title>Benchmarking of lightweight deep learning architectures for skin cancer classification using isic 2017 dataset</article-title>. <comment>arXiv:2110.12270</comment>. <year>2021</year>.</mixed-citation></ref>
<ref id="ref-17"><label>[17]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>El-Soud</surname> <given-names>MWA</given-names></string-name>, <string-name><surname>Gaber</surname> <given-names>T</given-names></string-name>, <string-name><surname>Tahoun</surname> <given-names>M</given-names></string-name>, <string-name><surname>Alourani</surname> <given-names>A</given-names></string-name></person-group>. <article-title>An enhanced deep learning method for skin cancer detection and classification</article-title>. <source>Comput Mater Contin</source>. <year>2022</year>;<volume>73</volume>(<issue>1</issue>):<fpage>1109</fpage>&#x2013;<lpage>23</lpage>. doi:<pub-id pub-id-type="doi">10.32604/cmc.2022.028561</pub-id>.</mixed-citation></ref>
<ref id="ref-18"><label>[18]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Thwin</surname> <given-names>SM</given-names></string-name>, <string-name><surname>Park</surname> <given-names>H-S</given-names></string-name>, <string-name><surname>Seo</surname> <given-names>SH</given-names></string-name></person-group>. <article-title>A trustworthy framework for skin cancer detection using a cnn with a modified attention mechanism</article-title>. <source>Appl Sci</source>. <year>2025</year>;<volume>15</volume>(<issue>3</issue>):<fpage>1067</fpage>. doi:<pub-id pub-id-type="doi">10.3390/app15031067</pub-id>.</mixed-citation></ref>
<ref id="ref-19"><label>[19]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Hamsalekha</surname> <given-names>R</given-names></string-name>, <string-name><surname>Devadhas</surname> <given-names>G</given-names></string-name>, <string-name><surname>Satheesha</surname> <given-names>TY</given-names></string-name></person-group>. <article-title>A novel deep learning approach for automated melanoma classification using hybrid cnn and vision transformer model</article-title>. <source>Fusion Pract Appl</source>. <year>2025</year>;<volume>254</volume>:<fpage>92</fpage>&#x2013;<lpage>101</lpage>. doi:<pub-id pub-id-type="doi">10.1109/gcat62922.2024.10923859</pub-id>.</mixed-citation></ref>
<ref id="ref-20"><label>[20]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Nawaz</surname> <given-names>K</given-names></string-name>, <string-name><surname>Zanib</surname> <given-names>A</given-names></string-name>, <string-name><surname>Shabir</surname> <given-names>I</given-names></string-name>, <string-name><surname>Li</surname> <given-names>J</given-names></string-name>, <string-name><surname>Wang</surname> <given-names>Y</given-names></string-name>, <string-name><surname>Mahmood</surname> <given-names>T</given-names></string-name>, <etal>et al</etal></person-group>. <article-title>Skin cancer detection using dermoscopic images with convolutional neural network</article-title>. <source>Sci Rep</source>. <year>2025</year>;<volume>15</volume>(<issue>1</issue>):<fpage>7252</fpage>. doi:<pub-id pub-id-type="doi">10.1038/s41598-025-91446-6</pub-id>; <pub-id pub-id-type="pmid">40021731</pub-id></mixed-citation></ref>
<ref id="ref-21"><label>[21]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Bello</surname> <given-names>A</given-names></string-name>, <string-name><surname>Ng</surname> <given-names>S-C</given-names></string-name>, <string-name><surname>Leung</surname> <given-names>M-F</given-names></string-name></person-group>. <article-title>Skin cancer classification using fine-tuned transfer learning of densenet-121</article-title>. <source>Appl Sci</source>. <year>2024</year>;<volume>14</volume>(<issue>17</issue>):<fpage>7707</fpage>. doi:<pub-id pub-id-type="doi">10.3390/app14177707</pub-id>.</mixed-citation></ref>
<ref id="ref-22"><label>[22]</label><mixed-citation publication-type="other"><person-group person-group-type="author"><string-name><surname>Kaggle</surname></string-name> <string-name> <given-names>UK</given-names></string-name></person-group>. <article-title>Skin cancer mnist: Ham10000 [Internet]</article-title>. <comment>[cited 2025 Feb 17]</comment>. Available from: <ext-link ext-link-type="uri" xlink:href="https://www.kaggle.com/datasets/kmader/skin-cancer-mnist-ham10000">https://www.kaggle.com/datasets/kmader/skin-cancer-mnist-ham10000</ext-link>.</mixed-citation></ref>
<ref id="ref-23"><label>[23]</label><mixed-citation publication-type="other"><person-group person-group-type="author"><string-name><surname>Radford</surname> <given-names>A</given-names></string-name>, <string-name><surname>Metz</surname> <given-names>L</given-names></string-name>, <string-name><surname>Chintala</surname> <given-names>S</given-names></string-name></person-group>. <article-title>Unsupervised representation learning with deep convolutional generative adversarial networks</article-title>. <comment>arXiv:1511.06434</comment>. <year>2016</year>.</mixed-citation></ref>
<ref id="ref-24"><label>[24]</label><mixed-citation publication-type="other"><person-group person-group-type="author"><string-name><surname>Karras</surname> <given-names>T</given-names></string-name>, <string-name><surname>Laine</surname> <given-names>S</given-names></string-name>, <string-name><surname>Aila</surname> <given-names>T</given-names></string-name></person-group>. <article-title>A style-based generator architecture for generative adversarial networks</article-title>. <comment>arXiv:1812.04948</comment>. <year>2019</year>.</mixed-citation></ref>
<ref id="ref-25"><label>[25]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Monica</surname> <given-names>KM</given-names></string-name>, <string-name><surname>Shreeharsha</surname> <given-names>J</given-names></string-name>, <string-name><surname>Falkowski-Gilski</surname> <given-names>P</given-names></string-name>, <string-name><surname>Falkowska-Gilska</surname> <given-names>B</given-names></string-name>, <string-name><surname>Awasthy</surname> <given-names>M</given-names></string-name>, <string-name><surname>Phadke</surname> <given-names>R</given-names></string-name></person-group>. <article-title>Melanoma skin cancer detection using mask-rcnn with modified gru model</article-title>. <source>Front Physiol</source>. <year>2024</year>;<volume>14</volume>:<fpage>1324042</fpage>. doi:<pub-id pub-id-type="doi">10.3389/fphys.2023.1324042</pub-id>; <pub-id pub-id-type="pmid">38292449</pub-id></mixed-citation></ref>
<ref id="ref-26"><label>[26]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Reis</surname> <given-names>HC</given-names></string-name>, <string-name><surname>Turk</surname> <given-names>V</given-names></string-name></person-group>. <article-title>Fusion of transformer attention and cnn features for skin cancer detection</article-title>. <source>Appl Soft Comput</source>. <year>2024</year>;<volume>164</volume>:<fpage>112013</fpage>. doi:<pub-id pub-id-type="doi">10.1016/j.asoc.2024.112013</pub-id>.</mixed-citation></ref>
<ref id="ref-27"><label>[27]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Zhao</surname> <given-names>X</given-names></string-name>, <string-name><surname>Wang</surname> <given-names>L</given-names></string-name>, <string-name><surname>Zhang</surname> <given-names>Y</given-names></string-name>, <string-name><surname>Han</surname> <given-names>X</given-names></string-name>, <string-name><surname>Deveci</surname> <given-names>M</given-names></string-name>, <string-name><surname>Parmar</surname> <given-names>M</given-names></string-name></person-group>. <article-title>A review of convolutional neural networks in computer vision</article-title>. <source>Artif Intell Rev</source>. <year>2024</year>;<volume>57</volume>(<issue>4</issue>):<fpage>99</fpage>. doi:<pub-id pub-id-type="doi">10.1007/s10462-024-10721-6</pub-id>.</mixed-citation></ref>
<ref id="ref-28"><label>[28]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Sarker</surname> <given-names>PK</given-names></string-name>, <string-name><surname>Zhao</surname> <given-names>Q</given-names></string-name>, <string-name><surname>Uddin</surname> <given-names>MK</given-names></string-name></person-group>. <article-title>Transformer-based person re-identification: a comprehensive review</article-title>. <source>IEEE Trans Intell Vehicles</source>. <year>2024</year>;<volume>9</volume>(<issue>7</issue>):<fpage>5222</fpage>&#x2013;<lpage>39</lpage>. doi:<pub-id pub-id-type="doi">10.1109/tiv.2024.3350669</pub-id>.</mixed-citation></ref>
<ref id="ref-29"><label>[29]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Chen</surname> <given-names>G</given-names></string-name></person-group>. <article-title>An interpretable composite cnn and gru for fine-grained martial arts motion modeling using big data analytics and machine learning</article-title>. <source>Soft Comput</source>. <year>2024</year>;<volume>28</volume>(<issue>3</issue>):<fpage>2223</fpage>&#x2013;<lpage>43</lpage>. doi:<pub-id pub-id-type="doi">10.1007/s00500-023-09565-z</pub-id>.</mixed-citation></ref>
<ref id="ref-30"><label>[30]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Natha</surname> <given-names>P</given-names></string-name>, <string-name><surname>Tera</surname> <given-names>SP</given-names></string-name>, <string-name><surname>Chinthaginjala</surname> <given-names>R</given-names></string-name>, <string-name><surname>Rab</surname> <given-names>SO</given-names></string-name>, <string-name><surname>Narasimhulu</surname> <given-names>CV</given-names></string-name>, <string-name><surname>Kim</surname> <given-names>TH</given-names></string-name></person-group>. <article-title>Boosting skin cancer diagnosis accuracy with ensemble approach</article-title>. <source>Sci Rep</source>. <year>2025</year>;<volume>15</volume>(<issue>1</issue>):<fpage>1290</fpage>. doi:<pub-id pub-id-type="doi">10.1038/s41598-024-84864-5</pub-id>; <pub-id pub-id-type="pmid">39779772</pub-id></mixed-citation></ref>
<ref id="ref-31"><label>[31]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Maher</surname> <given-names>RS</given-names></string-name>, <string-name><surname>Bawiskar</surname> <given-names>S</given-names></string-name></person-group>. <article-title>A comparative study of the performance of different machine learning algorithms in skin cancer classification</article-title>. <source>High Technol Lett</source>. <year>2023</year>;<volume>29</volume>(<issue>9</issue>):<fpage>429</fpage>&#x2013;<lpage>33</lpage>.</mixed-citation></ref>
<ref id="ref-32"><label>[32]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Ercal</surname> <given-names>F</given-names></string-name>, <string-name><surname>Chawla</surname> <given-names>A</given-names></string-name>, <string-name><surname>Stoecker</surname> <given-names>WV</given-names></string-name>, <string-name><surname>Lee</surname> <given-names>H-C</given-names></string-name>, <string-name><surname>Moss</surname> <given-names>RH</given-names></string-name></person-group>. <article-title>Neural network diagnosis of malignant melanoma from color images</article-title>. <source>IEEE Trans Biomed Eng</source>. <year>1994</year>;<volume>41</volume>(<issue>9</issue>):<fpage>837</fpage>&#x2013;<lpage>45</lpage>. doi:<pub-id pub-id-type="doi">10.1109/10.312091</pub-id>; <pub-id pub-id-type="pmid">7959811</pub-id></mixed-citation></ref>
<ref id="ref-33"><label>[33]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Dildar</surname> <given-names>M</given-names></string-name>, <string-name><surname>Akram</surname> <given-names>S</given-names></string-name>, <string-name><surname>Irfan</surname> <given-names>M</given-names></string-name>, <string-name><surname>Khan</surname> <given-names>HU</given-names></string-name>, <string-name><surname>Ramzan</surname> <given-names>M</given-names></string-name>, <string-name><surname>Mahmood</surname> <given-names>AR</given-names></string-name>, <etal>et al</etal></person-group>. <article-title>Skin cancer detection: a review using deep learning techniques</article-title>. <source>Int J Environ Res Public Health</source>. <year>2021</year>;<volume>18</volume>(<issue>10</issue>):<fpage>5479</fpage>. doi:<pub-id pub-id-type="doi">10.3390/ijerph18105479</pub-id>; <pub-id pub-id-type="pmid">34065430</pub-id></mixed-citation></ref>
<ref id="ref-34"><label>[34]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Aboulmira</surname> <given-names>A</given-names></string-name>, <string-name><surname>Hrimech</surname> <given-names>H</given-names></string-name>, <string-name><surname>Lachgar</surname> <given-names>M</given-names></string-name>, <string-name><surname>Hanine</surname> <given-names>M</given-names></string-name>, <string-name><surname>Garcia</surname> <given-names>CO</given-names></string-name>, <string-name><surname>Mezquita</surname> <given-names>GM</given-names></string-name>, <etal>et al.</etal></person-group> <article-title>Hybrid model with wavelet decomposition and efficientnet for accurate skin cancer classification</article-title>. <source>J Cancer</source>. <year>2025</year>;<volume>16</volume>(<issue>2</issue>):<fpage>506</fpage>. doi:<pub-id pub-id-type="doi">10.7150/jca.101574</pub-id>; <pub-id pub-id-type="pmid">39744476</pub-id></mixed-citation></ref>
<ref id="ref-35"><label>[35]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Rao</surname> <given-names>PVV</given-names></string-name></person-group>. <article-title>Skin cancer detection</article-title>. <source>Int J Res Appl Sci Eng Technol</source>. <year>2024</year>;<volume>12</volume>(<issue>4</issue>):<fpage>364</fpage>. doi:<pub-id pub-id-type="doi">10.22214/ijraset.2024.59725</pub-id>.</mixed-citation></ref>
<ref id="ref-36"><label>[36]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Ali</surname> <given-names>K</given-names></string-name>, <string-name><surname>Shaikh</surname> <given-names>ZA</given-names></string-name>, <string-name><surname>Khan</surname> <given-names>AA</given-names></string-name>, <string-name><surname>Laghari</surname> <given-names>AA</given-names></string-name></person-group>. <article-title>Multiclass skin cancer classification using efficientnets&#x2014;a first step towards preventing skin cancer</article-title>. <source>Neurosci Inform</source>. <year>2022</year>;<volume>2</volume>(<issue>4</issue>):<fpage>100034</fpage>. doi:<pub-id pub-id-type="doi">10.1016/j.neuri.2021.100034</pub-id>.</mixed-citation></ref>
<ref id="ref-37"><label>[37]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Bechelli</surname> <given-names>S</given-names></string-name>, <string-name><surname>Delhommelle</surname> <given-names>J</given-names></string-name></person-group>. <article-title>Machine learning and deep learning algorithms for skin cancer classification from dermoscopic images</article-title>. <source>Bioengineering</source>. <year>2022</year>;<volume>9</volume>(<issue>3</issue>):<fpage>97</fpage>. doi:<pub-id pub-id-type="doi">10.3390/bioengineering9030097</pub-id>; <pub-id pub-id-type="pmid">35324786</pub-id></mixed-citation></ref>
<ref id="ref-38"><label>[38]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Gururaj</surname> <given-names>HL</given-names></string-name>, <string-name><surname>Manju</surname> <given-names>N</given-names></string-name>, <string-name><surname>Nagarjun</surname> <given-names>A</given-names></string-name>, <string-name><surname>Aradhya</surname> <given-names>VNM</given-names></string-name>, <string-name><surname>Flammini</surname> <given-names>F</given-names></string-name></person-group>. <article-title>Deepskin: a deep learning approach for skin cancer classification</article-title>. <source>IEEE Access</source>. <year>2023</year>;<volume>11</volume>:<fpage>50205</fpage>&#x2013;<lpage>14</lpage>. doi:<pub-id pub-id-type="doi">10.1109/access.2023.3274848</pub-id>.</mixed-citation></ref>
<ref id="ref-39"><label>[39]</label><mixed-citation publication-type="conf-proc"><person-group person-group-type="author"><string-name><surname>Javaid</surname> <given-names>A</given-names></string-name>, <string-name><surname>Sadiq</surname> <given-names>M</given-names></string-name>, <string-name><surname>Akram</surname> <given-names>F</given-names></string-name></person-group>. <article-title>Skin cancer classification using image processing and machine learning</article-title>. In: <conf-name>2021 International Bhurban Conference on Applied Sciences and Technologies (IBCAST)</conf-name>; <year>2021 Jan 12&#x2013;16</year>; <publisher-loc>Islamabad, Pakistan</publisher-loc>. p. <fpage>439</fpage>&#x2013;<lpage>44</lpage>.</mixed-citation></ref>
<ref id="ref-40"><label>[40]</label><mixed-citation publication-type="conf-proc"><person-group person-group-type="author"><string-name><surname>Liu</surname> <given-names>Z</given-names></string-name>, <string-name><surname>Lin</surname> <given-names>Y</given-names></string-name>, <string-name><surname>Cao</surname> <given-names>Y</given-names></string-name>, <string-name><surname>Hu</surname> <given-names>H</given-names></string-name>, <string-name><surname>Wei</surname> <given-names>Y</given-names></string-name>, <string-name><surname>Zhang</surname> <given-names>Z</given-names></string-name>, <etal>et al.</etal></person-group> <article-title>Swin transformer: hierarchical vision transformer using shifted windows</article-title>. In: <conf-name>Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)</conf-name>; <year>2021 Oct 10&#x2013;17</year>; <publisher-loc>Montreal, QC, Canada</publisher-loc>. p. <fpage>9992</fpage>&#x2013;<lpage>10002</lpage>.</mixed-citation></ref>
<ref id="ref-41"><label>[41]</label><mixed-citation publication-type="conf-proc"><person-group person-group-type="author"><string-name><surname>Liu</surname> <given-names>Z</given-names></string-name>, <string-name><surname>Mao</surname> <given-names>H</given-names></string-name>, <string-name><surname>Wu</surname> <given-names>C-Y</given-names></string-name>, <string-name><surname>Feichtenhofer</surname> <given-names>C</given-names></string-name>, <string-name><surname>Darrell</surname> <given-names>T</given-names></string-name>, <string-name><surname>Xie</surname> <given-names>S</given-names></string-name></person-group>. <article-title>A convnet for the 2020s</article-title>. In: <conf-name>Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)</conf-name>; <year>2022 Jun 18&#x2013;24</year>; <publisher-loc>New Orleans, LA, USA</publisher-loc>. p. <fpage>11966</fpage>&#x2013;<lpage>76</lpage>.</mixed-citation></ref>
<ref id="ref-42"><label>[42]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Nivedha</surname> <given-names>S</given-names></string-name>, <string-name><surname>Shankar</surname> <given-names>S</given-names></string-name></person-group>. <article-title>Melanoma diagnosis using enhanced faster region convolutional neural networks optimized by artificial gorilla troops algorithm</article-title>. <source>Inf Technol Control</source>. <year>2023</year>;<volume>52</volume>(<issue>4</issue>):<fpage>819</fpage>&#x2013;<lpage>32</lpage>. doi:<pub-id pub-id-type="doi">10.5755/j01.itc.52.4.33503</pub-id>.</mixed-citation></ref>
<ref id="ref-43"><label>[43]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Ali</surname> <given-names>A</given-names></string-name>, <string-name><surname>Shahbaz</surname> <given-names>H</given-names></string-name>, <string-name><surname>Dama&#x0161;evi&#x010D;ius</surname> <given-names>R</given-names></string-name></person-group>. <article-title>xCViT: improved vision transformer network with fusion of cnn and xception for skin disease recognition with explainable ai</article-title>. <source>Comput Mater Contin</source>. <year>2025</year>;<volume>83</volume>(<issue>1</issue>):<fpage>1367</fpage>&#x2013;<lpage>98</lpage>. doi:<pub-id pub-id-type="doi">10.32604/cmc.2025.059301</pub-id>.</mixed-citation></ref>
<ref id="ref-44"><label>[44]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Shahadha</surname> <given-names>RS</given-names></string-name>, <string-name><surname>Al-Khateeb</surname> <given-names>B</given-names></string-name></person-group>. <article-title>Dual convolutional neural network for skin cancer classification</article-title>. <source>J Cybersecur Inform Manage (JCIM)</source>. <year>2025</year>;<volume>15</volume>(<issue>2</issue>):<fpage>35</fpage>&#x2013;<lpage>42</lpage>.</mixed-citation></ref>
<ref id="ref-45"><label>[45]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Nguyen</surname> <given-names>ATP</given-names></string-name>, <string-name><surname>Jewel</surname> <given-names>RM</given-names></string-name>, <string-name><surname>Akter</surname> <given-names>A</given-names></string-name></person-group>. <article-title>Comparative analysis of machine learning models for automated skin cancer detection: advancements in diagnostic accuracy and ai integration</article-title>. <source>Am J Med Sci Pharm Res</source>. <year>2025</year>;<volume>7</volume>(<issue>1</issue>):<fpage>15</fpage>&#x2013;<lpage>26</lpage>.</mixed-citation></ref>
<ref id="ref-46"><label>[46]</label><mixed-citation publication-type="book"><person-group person-group-type="author"><string-name><surname>Hwang</surname> <given-names>CL</given-names></string-name>, <string-name><surname>Yoon</surname> <given-names>K</given-names></string-name></person-group>. <source>Multiple attribute decision making: methods and applications: a state-of-the-art survey</source>. <publisher-loc>New York, NY, USA</publisher-loc>: <publisher-name>Springer Verlag</publisher-name>; <year>2012</year>.</mixed-citation></ref>
</ref-list>
</back></article>











