<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.1 20151215//EN" "http://jats.nlm.nih.gov/publishing/1.1/JATS-journalpublishing1.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:mml="http://www.w3.org/1998/Math/MathML" xml:lang="en" article-type="research-article" dtd-version="1.1">
<front>
<journal-meta>
<journal-id journal-id-type="pmc">CMC</journal-id>
<journal-id journal-id-type="nlm-ta">CMC</journal-id>
<journal-id journal-id-type="publisher-id">CMC</journal-id>
<journal-title-group>
<journal-title>Computers, Materials &#x0026; Continua</journal-title>
</journal-title-group>
<issn pub-type="epub">1546-2226</issn>
<issn pub-type="ppub">1546-2218</issn>
<publisher>
<publisher-name>Tech Science Press</publisher-name>
<publisher-loc>USA</publisher-loc>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">67381</article-id>
<article-id pub-id-type="doi">10.32604/cmc.2025.067381</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Article</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Integration of YOLOv11 and Histogram Equalization for Fire and Smoke-Based Detection of Forest and Land Fires</article-title>
<alt-title alt-title-type="left-running-head">Integration of YOLOv11 and Histogram Equalization for Fire and Smoke-Based Detection of Forest and Land Fires</alt-title>
<alt-title alt-title-type="right-running-head">Integration of YOLOv11 and Histogram Equalization for Fire and Smoke-Based Detection of Forest and Land Fires</alt-title>
</title-group>
<contrib-group>
<contrib id="author-1" contrib-type="author">
<name name-style="western"><surname>Dewi</surname><given-names>Christine</given-names></name><xref ref-type="aff" rid="aff-1">1</xref><xref ref-type="aff" rid="aff-2">2</xref></contrib>
<contrib id="author-2" contrib-type="author">
<name name-style="western"><surname>Santoso</surname><given-names>Melati Viaeritas Vitrieco</given-names></name><xref ref-type="aff" rid="aff-1">1</xref></contrib>
<contrib id="author-3" contrib-type="author">
<name name-style="western"><surname>Chernovita</surname><given-names>Hanna Prillysca</given-names></name><xref ref-type="aff" rid="aff-3">3</xref></contrib>
<contrib id="author-4" contrib-type="author">
<name name-style="western"><surname>Mailoa</surname><given-names>Evangs</given-names></name><xref ref-type="aff" rid="aff-1">1</xref></contrib>
<contrib id="author-5" contrib-type="author">
<name name-style="western"><surname>Philemon</surname><given-names>Stephen Abednego</given-names></name><xref ref-type="aff" rid="aff-1">1</xref></contrib>
<contrib id="author-6" contrib-type="author" corresp="yes">
<name name-style="western"><surname>Chen</surname><given-names>Abbott Po Shun</given-names></name><xref ref-type="aff" rid="aff-4">4</xref><email>chprosen@gm.cyut.edu.tw</email></contrib>
<aff id="aff-1"><label>1</label><institution>Department of Information Technology, Satya Wacana Christian University</institution>, <addr-line>Jalan Diponegoro No. 52-60, Salatiga, 50711</addr-line>, <country>Indonesia</country></aff>
<aff id="aff-2"><label>2</label><institution>School of Information Technology, Deakin University, 221 Burwood Highway</institution>, <addr-line>Burwood, VIC 3125</addr-line>, <country>Australia</country></aff>
<aff id="aff-3"><label>3</label><institution>Department of Information Systems, Satya Wacana Christian University</institution>, <country>Jalan Diponegoro No. 52-60</country>, <addr-line>Salatiga, 50711</addr-line>, <country>Indonesia</country></aff>
<aff id="aff-4"><label>4</label><institution>Department of Marketing and Logistics Management, Chaoyang University of Technology, 168 Jifeng East Road</institution>, <addr-line>Taichung City, 413310</addr-line>, <country>Taiwan</country></aff>
</contrib-group>
<author-notes>
<corresp id="cor1"><label>&#x002A;</label>Corresponding Author: Abbott Po Shun Chen. Email: <email>chprosen@gm.cyut.edu.tw</email></corresp>
</author-notes>
<pub-date date-type="collection" publication-format="electronic">
<year>2025</year>
</pub-date>
<pub-date date-type="pub" publication-format="electronic">
<day>30</day><month>07</month><year>2025</year>
</pub-date>
<volume>84</volume>
<issue>3</issue>
<fpage>5361</fpage>
<lpage>5379</lpage>
<history>
<date date-type="received">
<day>01</day>
<month>5</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>18</day>
<month>6</month>
<year>2025</year>
</date>
</history>
<permissions>
<copyright-statement>&#x00A9; 2025 The Authors.</copyright-statement>
<copyright-year>2025</copyright-year>
<copyright-holder>Published by Tech Science Press.</copyright-holder>
<license xlink:href="https://creativecommons.org/licenses/by/4.0/">
<license-p>This work is licensed under a <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution 4.0 International License</ext-link>, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
</license>
</permissions>
<self-uri content-type="pdf" xlink:href="TSP_CMC_67381.pdf"></self-uri>
<abstract>
<p>Early detection of Forest and Land Fires (FLF) is essential to prevent the rapid spread of fire as well as minimize environmental damage. However, accurate detection under real-world conditions, such as low light, haze, and complex backgrounds, remains a challenge for computer vision systems. This study evaluates the impact of three image enhancement techniques&#x2014;Histogram Equalization (HE), Contrast Limited Adaptive Histogram Equalization (CLAHE), and a hybrid method called DBST-LCM CLAHE&#x2014;on the performance of the YOLOv11 object detection model in identifying fires and smoke. The D-Fire dataset, consisting of 21,527 annotated images captured under diverse environmental scenarios and illumination levels, was used to train and evaluate the model. Each enhancement method was applied to the dataset before training. Model performance was assessed using multiple metrics, including Precision, Recall, mean Average Precision at 50% IoU (mAP50), F1-score, and visual inspection through bounding box results. Experimental results show that all three enhancement techniques improved detection performance. HE yielded the highest mAP50 score of 0.771, along with a balanced precision of 0.784 and recall of 0.703, demonstrating strong generalization across different conditions. DBST-LCM CLAHE achieved the highest Precision score of 79%, effectively reducing false positives, particularly in scenes with dispersed smoke or complex textures. CLAHE, with slightly lower overall metrics, contributed to improved local feature detection. Each technique showed distinct advantages: HE enhanced global contrast; CLAHE improved local structure visibility; and DBST-LCM CLAHE provided an optimal balance through dynamic block sizing and local contrast preservation. These results underline the importance of selecting preprocessing methods according to detection priorities, such as minimizing false alarms or maximizing completeness. This research does not propose a new model architecture but rather benchmarks a recent lightweight detector, YOLOv11, combined with image enhancement strategies for practical deployment in FLF monitoring. The findings support the integration of preprocessing techniques to improve detection accuracy, offering a foundation for real-time FLF detection systems on edge devices or drones, particularly in regions like Indonesia.</p>
</abstract>
<kwd-group kwd-group-type="author">
<kwd>Histogram equalization</kwd>
<kwd>YOLO</kwd>
<kwd>forest and land fire detection</kwd>
<kwd>deep learning</kwd>
</kwd-group>
<funding-group>
<award-group id="awg1">
<funding-source>Directorate of Research, Technology, and Community Service, Ministry of Higher Education, Science, and Technology of the Republic of Indonesia the Regular Fundamental Research scheme</funding-source>
<award-id>001/LL6/PL/AL.04/2025</award-id>
<award-id>011/SPK-PFR/RIK/05/2025</award-id>
</award-group>
</funding-group>
</article-meta>
</front>
<body>
<sec id="s1">
<label>1</label>
<title>Introduction</title>
<p>The Eaton Fire, more widely known as the LA Wildfire&#x2014;a forest fire incident that occurred in early 2025&#x2014;has so far claimed 30 lives and destroyed up to 15,000 buildings. This incident highlights the importance of early detection and mitigation of forest and land fires (hereafter referred to as FLF). As of this year, FLF has burned 126 hectares of land across Indonesia, causing significant ecological and economic impacts [<xref ref-type="bibr" rid="ref-1">1</xref>]. Whether caused by extreme weather and drought or by human activities like uncontrolled burning to clear land, FLF can result in environmental degradation, increased greenhouse gas emissions, and health issues due to the resulting smoke.</p>
<p>Early detection of forest and land fires is crucial for mitigating and preventing such disasters. Small sources of fire must be detected quickly before they spread and turn into large-scale wildfires. Delays in detection can accelerate fire spread. An effective monitoring system for FLF can enhance awareness and preparation in reducing post-disaster impacts. In turn, this will also support the sustainability of ecosystems for the future.</p>
<p>By utilizing deep learning models, <xref ref-type="table" rid="table-1">Table 1</xref> shows that various studies have developed early detection systems for fire and smoke, for example, using YOLOv4 to YOLOv8, as well as other variants. However, among these systems, there are still two main aspects that need improvement&#x2014;detection speed and accuracy, especially in recognizing fire and smoke. The optimization of YOLOv5 up to YOLOv11 for real-time detection has been explored in several studies [<xref ref-type="bibr" rid="ref-2">2</xref>&#x2013;<xref ref-type="bibr" rid="ref-4">4</xref>]. Some researchers have also combined the YOLO model with other techniques (such as filter pruning [<xref ref-type="bibr" rid="ref-5">5</xref>]) to reduce computational load on low-power devices.</p>
<table-wrap id="table-1">
<label>Table 1</label>
<caption>
<title>Summary of FNF detection methods in the literature</title>
</caption>
<table>
<colgroup>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th align="center">Author</th>
<th align="center">Method</th>
<th align="center">Contribution</th>
<th align="center">Limitation</th>
</tr>
</thead>
<tbody>
<tr>
<td>Goncalves et al. [<xref ref-type="bibr" rid="ref-2">2</xref>], 2024</td>
<td>YOLOv5, 7, and 8 models</td>
<td>Detection speed &#x0026; smoke localization</td>
<td>Detection of small areas with visual disturbances (fog, clouds, sun)</td>
</tr>
<tr>
<td>Wicaksono [<xref ref-type="bibr" rid="ref-3">3</xref>], 2024</td>
<td>YOLOv8 model</td>
<td>Real-time detection efficiency &#x0026; accuracy</td>
<td>Effectiveness in real-time field conditions</td>
</tr>
<tr>
<td>Ven&#x00E2;ncio et al. [<xref ref-type="bibr" rid="ref-5">5</xref>], 2022</td>
<td>Combination of YOLOv4 model and pruning filter</td>
<td>Computational load (83.6%) &#x0026; memory (83.86%) efficiency on low-power devices</td>
<td>Accuracy of detecting multiple fire points in complex scenarios</td>
</tr>
<tr>
<td>Pan et al. [<xref ref-type="bibr" rid="ref-6">6</xref>], 2021</td>
<td>Distillation approach in CNN</td>
<td>Lightweight Faster R-CNN framework</td>
<td>Segmentation accuracy compared to pixel labels</td>
</tr>
<tr>
<td>Perrolas et al. [<xref ref-type="bibr" rid="ref-7">7</xref>], 2022</td>
<td>Segmentation-classification with SqueezeNet &#x002B; U-Net and quad-tree</td>
<td>Accuracy in segmenting small fire areas &#x0026; fire localization</td>
<td>Effectiveness on other image types &#x0026; accuracy in large fire areas being roughly segmented</td>
</tr>
<tr>
<td>Tingting Li et al. [<xref ref-type="bibr" rid="ref-8">8</xref>], 2022</td>
<td>Adversarial fusion network (AFN) &#x0026; domain-adversarial neural network (DANN)</td>
<td>Optimized detection of small smoke in complex scenarios</td>
<td>Detection accuracy in complex scenarios &#x0026; data variability due to lack of feature detail</td>
</tr>
<tr>
<td>Li et al. [<xref ref-type="bibr" rid="ref-9">9</xref>], 2025</td>
<td>YOLOv8 model optimization with SE-ResNeXt &#x0026; Focal-SIoU</td>
<td>Optimized detection of varying fire sizes</td>
<td>High computational cost for low-power devices</td>
</tr>
<tr>
<td>Muksimova et al. [<xref ref-type="bibr" rid="ref-10">10</xref>], 2024</td>
<td>Miti-DTER with modified AlexNet backbone &#x0026; HE data</td>
<td>Effective feature extraction &#x0026; training stability</td>
<td>Real-time detection effectiveness &#x0026; non-fire scenarios</td>
</tr>
<tr>
<td>Farhan et al. [<xref ref-type="bibr" rid="ref-11">11</xref>], 2022</td>
<td>YOLOv4 model &#x0026; HE data</td>
<td>Accuracy &#x0026; speed of fire detection</td>
<td>Detection stability is affected by input resolution</td>
</tr>
<tr>
<td>Ayumi et al. [<xref ref-type="bibr" rid="ref-12">12</xref>], 2024</td>
<td>Xception &#x0026; MobileNet models and CLAHE data</td>
<td>Fire and smoke detection accuracy</td>
<td>Detection of data variations in complex real-time scenarios</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The novelty of this research does not lie in proposing a fundamentally new detection architecture or image enhancement technique, but rather in conducting a comprehensive empirical evaluation of the recently launched YOLOv11 model combined with selected image enhancement techniques such as Histogram Equalization (HE) [<xref ref-type="bibr" rid="ref-11">11</xref>], Contrast Limited Adaptive Histogram Equalization (CLAHE) [<xref ref-type="bibr" rid="ref-12">12</xref>], and the DBST-LCM CLAHE method proposed by Chakraverti et al. [<xref ref-type="bibr" rid="ref-13">13</xref>].</p>
<p>Prior works in forest fire detection have focused on various improvements across model design, training strategies, and preprocessing methods. For example, Pan et al. [<xref ref-type="bibr" rid="ref-6">6</xref>] developed a lightweight Faster R-CNN via distillation, Ven&#x00E2;ncio et al. [<xref ref-type="bibr" rid="ref-5">5</xref>] optimized YOLOv4 pruning for low-power devices, and Muksimova et al. [<xref ref-type="bibr" rid="ref-10">10</xref>] integrated Miti-DETR with HE to improve feature extraction and training stability. Other studies enhanced detection speed and real-time efficiency using YOLOv5, YOLOv7, YOLOv8 [<xref ref-type="bibr" rid="ref-2">2</xref>,<xref ref-type="bibr" rid="ref-3">3</xref>], and SE-ResNeXt backbones [<xref ref-type="bibr" rid="ref-9">9</xref>].</p>
<p>However, challenges remain in balancing computational efficiency, detection accuracy, and robustness under complex real-world conditions, especially with smoke and small fire regions amid environmental disturbances like fog and varying illumination. The recent work of [<xref ref-type="bibr" rid="ref-14">14</xref>] introduces an adaptive hierarchical multi-headed CNN with a modified convolutional block attention mechanism to improve aerial forest fire detection precision. Although this method shows promising accuracy, its high computational complexity may limit practical real-time deployment on resource-constrained platforms.</p>
<p>In contrast, this study prioritizes evaluating YOLOv11&#x2019;s performance when combined with different image enhancement methods, including HE [<xref ref-type="bibr" rid="ref-11">11</xref>], CLAHE [<xref ref-type="bibr" rid="ref-12">12</xref>], and DBST-LCM CLAHE [<xref ref-type="bibr" rid="ref-13">13</xref>], to improve detection precision and robustness while maintaining computational efficiency suitable for real-time wildfire detection. The empirical results highlight YOLOv11&#x2019;s strong potential as an efficient single-shot detector, with DBST-LCM CLAHE showing superior precision by adaptively enhancing local contrast without excessive noise amplification, advancing beyond traditional HE and CLAHE techniques.</p>
<p>Thus, this work contributes valuable benchmarking and practical insights into how state-of-the-art detection models can be effectively paired with image enhancement preprocessing to address forest and land fire detection challenges in realistic environments, complementing existing research focused on architectural innovations. Unlike recent methods that focus primarily on model complexity and attention mechanisms [<xref ref-type="bibr" rid="ref-10">10</xref>&#x2013;<xref ref-type="bibr" rid="ref-12">12</xref>], our study explores a lightweight yet effective combination of modern object detection and preprocessing techniques to achieve a balance between accuracy and real-time feasibility.</p>
<p>The structure of this report is as follows: the theoretical foundation of this study will be explained in Related Work. The materials and methods used will then be discussed in <xref ref-type="sec" rid="s3">Section 3</xref>, Methodology. <xref ref-type="sec" rid="s4">Section 4</xref>&#x2014;Results and Discussions&#x2014;will present the experimental results, analysis, comparisons, and insights related to FNF detection. Finally, <xref ref-type="sec" rid="s5">Section 5</xref>, Conclusions, will summarize the findings and explore potential directions for future research.</p>
</sec>
<sec id="s2">
<label>2</label>
<title>Related Work</title>
<sec id="s2_1">
<label>2.1</label>
<title>Forest and Land Fires (FLF) Detection</title>
<p>The process of identifying and recognizing early indicators of FLF, such as localized temperature spikes and the appearance of smoke from fire sources, through the use of visual analysis technology is commonly referred to as FLF detection. By analyzing images to assess the size and density of fire and smoke in forest and land areas, FLF detection has become a critical component of early disaster warning systems and global forest conservation. Indonesian FLF Prevention Patrol System [<xref ref-type="bibr" rid="ref-15">15</xref>] serves as a real-world example of early FLF detection implementation.</p>
<p>Li et al., in their study, combined AFN and DANN models to improve the detection accuracy of small-sized smoke in complex forest scenarios [<xref ref-type="bibr" rid="ref-8">8</xref>]. This method showed significant improvements in both detection accuracy and generalization ability, particularly in reducing false alarms. Such capability is critical to ensure detection systems can identify potential fires even from the early appearance of smoke before it spreads and becomes uncontrollable. This is especially crucial in tropical forests, like those in Indonesia, where vegetation is dense.</p>
<p>In addition, the YOLO model has become one of the most widely adopted techniques for developing real-time FLF detection systems [<xref ref-type="bibr" rid="ref-16">16</xref>]. Utilizing YOLOv5, v6, and v8 for smoke localization, Goncalves et al. [<xref ref-type="bibr" rid="ref-2">2</xref>] still encountered challenges in detecting small fires under visually disturbed conditions caused by haze and sunlight. Caixiong Li et al. [<xref ref-type="bibr" rid="ref-9">9</xref>] further explored YOLOv8&#x2019;s capability in recognizing varying fire sizes, although the method required high computational resources. This can hinder FLF detection efficiency in real-time scenarios with limited computing power.</p>
<p>On the other hand, Ven&#x00E2;ncio et al. [<xref ref-type="bibr" rid="ref-5">5</xref>] addressed this issue by combining YOLOv4 with pruning filters, which demonstrated YOLO&#x2019;s strong potential in FLF detection. However, this still needs further testing under underexplored data conditions. A clear example would be maximizing early-stage fire detection when smoke or fire visuals are faint and scattered, while simultaneously avoiding false alarms. To support effective early warning systems and FLF mitigation, developing an approach that can maintain accuracy under such conditions becomes highly crucial.</p>
</sec>
<sec id="s2_2">
<label>2.2</label>
<title>Histogram Equalization (HE)</title>
<p>Histogram Equalization (HE) is an image enhancement technique that improves visual quality by equalizing contrast through the redistribution of pixel intensity values. It has been successfully applied in medical and forestry imaging, particularly in improving Signal-to-Noise Ratio (SNR) metrics [<xref ref-type="bibr" rid="ref-11">11</xref>,<xref ref-type="bibr" rid="ref-17">17</xref>]. However, HE has limitations in scenes with uneven lighting, where global adjustments may fail to enhance local details.</p>
<p>To overcome this, Adaptive Histogram Equalization (AHE) was introduced, operating locally on sub-regions of the image. Yet, AHE often excessively amplifies noise. CLAHE (Contrast Limited Adaptive Histogram Equalization) improves upon AHE by applying a clip limit to the histogram, thus preserving visual stability while enhancing local contrast [<xref ref-type="bibr" rid="ref-18">18</xref>]. CLAHE is particularly useful in handling varying illumination levels and textured backgrounds, making it more suitable for complex forest scenes compared to traditional HE or Retinex-based methods.</p>
<p>Recent studies have also explored hybrid techniques such as Fuzzy Contrast Enhancement (FCE) and learning-based histogram models to further improve detail visibility in low-resolution or noisy images [<xref ref-type="bibr" rid="ref-19">19</xref>]. Applications in plant disease detection have demonstrated that HE and CLAHE can improve classification accuracy by clarifying subtle patterns and textures during preprocessing [<xref ref-type="bibr" rid="ref-20">20</xref>,<xref ref-type="bibr" rid="ref-21">21</xref>].</p>
<p>To address dynamic scene complexity and preserve fine details, DBST-LCM (Dynamic Block Size Technique-Local Contrast Modification) was developed. This method adapts enhancement parameters by dynamically selecting block sizes based on image features and applying localized adjustments. It then performs CLAHE, followed by a feedback-driven quality check to ensure contrast clarity and sharpness in complex backgrounds [<xref ref-type="bibr" rid="ref-13">13</xref>]. Unlike conventional methods, DBST-LCM provides both adaptability and structure-aware enhancement, making it especially effective for detecting subtle smoke or fire signatures in challenging FLF conditions.</p>
<p>These three techniques&#x2014;HE, CLAHE, and DBST-LCM&#x2014;were chosen for this study due to their progressive improvements in enhancing visual cues critical for fire and smoke detection under varying illumination and environmental conditions. Their performance will be evaluated comprehensively to determine their suitability for real-time FLF detection.</p>
</sec>
<sec id="s2_3">
<label>2.3</label>
<title>YOLOv11</title>
<p>YOLOv11 is the latest evolution in the YOLO (You Only Look Once) architecture series, developed to improve real-time object detection by enhancing both accuracy and computational efficiency. Key innovations in this version include the C3K2 block and the C2PSA attention module, which are designed to optimize feature extraction and attention to critical image regions. These improvements allow YOLOv11 to achieve better inference speed and precision compared to its predecessors&#x2014;YOLOv8, YOLOv9, and YOLOv10&#x2014;without significantly increasing model complexity. In this research, the official open-source implementation of YOLOv11 provided by Ultralytics (available at <ext-link ext-link-type="uri" xlink:href="https://docs.ultralytics.com/models/yolo11/">https://docs.ultralytics.com/models/yolo11/</ext-link>, accessed on 17 June 2024) has been adopted to ensure reproducibility and consistency with the original architecture.</p>
<p>Here are the key features of YOLOv11: (1) Efficient Feature Extraction with the C3K2 Block: The C3K2 block is an enhancement of the Cross Stage Partial (CSP) architecture used in previous versions. By using two small convolutions (3 &#x00D7; 3 kernel) instead of one large convolution, the C3K2 block maintains feature extraction performance while reducing the number of parameters and computational load [<xref ref-type="bibr" rid="ref-22">22</xref>]. (2) Improved Spatial Attention with C2PSA: C2PSA (Cross Stage Partial with Spatial Attention) is a new module that introduces a spatial attention mechanism to help the model focus more on major areas in the image, such as small objects or partially occluded objects. This improves the model&#x2019;s sensitivity to spatial variations in the image [<xref ref-type="bibr" rid="ref-23">23</xref>]. (3) Multi-Scale Feature Combination through SPPF: Like previous versions, YOLOv11 retains the Spatial Pyramid Pooling Fast (SPPF) module, which combines features from various scales to enhance the detection of both small and large objects [<xref ref-type="bibr" rid="ref-24">24</xref>]. (4) CBS Blocks for Inference Stability: YOLOv11 also uses a Convolution-BatchNorm-SiLU (CBS) arrangement in the head section to ensure stable and effective data flow, supporting more accurate bounding box prediction and classification [<xref ref-type="bibr" rid="ref-23">23</xref>].</p>
<p>As part of the Single Shot Detector (SSD) architecture family, YOLOv11 performs object detection in a single forward pass, eliminating the need for region proposal stages found in two-stage detectors like Faster R-CNN. This makes YOLOv11 highly efficient and well-suited for real-time applications, including wildfire detection, where rapid response is critical. Combined with its enhanced modules&#x2014;such as C3K2, C2PSA, and SPPF&#x2014;YOLOv11 achieves robust performance in detecting fire and smoke under challenging conditions like low light, varied object scales, and partial occlusion. These improvements make it a robust and practical solution for early forest and land fire monitoring systems, particularly when deployed in real-time surveillance setups using UAVs (drones) or edge devices in high-risk areas such as peatlands or remote conservation forests.</p>
</sec>
</sec>
<sec id="s3">
<label>3</label>
<title>Methodology</title>
<sec id="s3_1">
<label>3.1</label>
<title>D-Fire Dataset</title>
<p>Focused on effective early detection of fire and smoke in real-time conditions, this experiment is based on real images representing fire and smoke events from various environmental conditions. These include scenarios with only fire, only smoke, a combination of both, and negative examples without fire or smoke but with visual elements that might be misinterpreted. Based on these four scenarios, Venancio et al. [<xref ref-type="bibr" rid="ref-25">25</xref>] developed the D-Fire dataset, as outlined in <xref ref-type="table" rid="table-2">Table 2</xref>. D-Fire consists of 21,527 labeled images categorized accordingly. Although the fire category contains fewer images, it has a higher annotation density, with an average of 2.52 fire objects per image. In contrast, smoke objects in the smoke and fire-and-smoke categories have an average of 1.13 annotations per image. In total, the dataset includes 26,557 bounding boxes: 14,692 labeled as fire and 11,865 as smoke, as shown in <xref ref-type="table" rid="table-3">Table 3</xref>.</p>
<table-wrap id="table-2">
<label>Table 2</label>
<caption>
<title>The distribution of images per scenario category in the D-Fire dataset</title>
</caption>
<table>
<colgroup>
<col/>
<col/>
<col/>
</colgroup>
<thead>
<tr>
<th>Scenario</th>
<th>Description of scenario</th>
<th>Image</th>
</tr>
</thead>
<tbody>
<tr>
<td>Fire</td>
<td>Images containing only fire</td>
<td>1164</td>
</tr>
<tr>
<td>Smoke</td>
<td>Images containing only smoke</td>
<td>5867</td>
</tr>
<tr>
<td>Fire and smoke</td>
<td>Images containing both fire and smoke</td>
<td>4658</td>
</tr>
<tr>
<td>None</td>
<td>Images containing neither fire nor smoke</td>
<td>9838</td>
</tr>
<tr>
<td></td>
<td>Total of images</td>
<td>21,527</td>
</tr>
</tbody>
</table>
</table-wrap><table-wrap id="table-3">
<label>Table 3</label>
<caption>
<title>Number of annotations and density per image in the D-Fire dataset</title>
</caption>
<table>
<colgroup>
<col/>
<col/>
<col/>
</colgroup>
<thead>
<tr>
<th>Object category</th>
<th>Bounding box</th>
<th>Average per-image</th>
</tr>
</thead>
<tbody>
<tr>
<td>Fire</td>
<td>14,692</td>
<td>2.52</td>
</tr>
<tr>
<td>Smoke</td>
<td>11,865</td>
<td>1.13</td>
</tr>
<tr>
<td>Total</td>
<td>26,557</td>
<td></td>
</tr>
</tbody>
</table>
</table-wrap>
<p>To support variation, the images were collected from several sources, including internet searches, legal fire simulations at the Technological Park of Belo Horizonte (Brazil), surveillance camera footage from Universidade Federal de Minas Gerais (UFMG), and Serra Verde State Park [<xref ref-type="bibr" rid="ref-26">26</xref>]. Additionally, some synthetic images were generated using montage techniques by overlaying artificial smoke onto green landscape backgrounds with photo editing software to simulate real forest conditions. <xref ref-type="fig" rid="fig-1">Fig. 1</xref> visually represents the diversity within the dataset, highlighting different instances of fire and smoke along with their corresponding ground-truth labels. The dataset captures a broad spectrum of scene types&#x2014;such as forests, parks, and semi-urban environments&#x2014;as well as variations in camera angles, smoke density, and lighting conditions.</p>
<fig id="fig-1">
<label>Figure 1</label>
<caption>
<title>D-Fire dataset instances</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMC_67381-fig-1.tif"/>
</fig>
<p>This diversity allows for a comprehensive evaluation of image enhancement methods, especially in challenging scenarios such as diffused smoke, reduced visibility, and fluctuating lighting conditions. As a result, D-Fire is particularly well-suited for testing contrast-based techniques aimed at enhancing feature clarity in complex visual environments.</p>
</sec>
<sec id="s3_2">
<label>3.2</label>
<title>Experiment Setting</title>
<p>The D-Fire dataset is initially divided into two parts with an 80:20 ratio for training and testing. We then combined these two folders before randomly splitting the dataset again into a 70:20:10 ratio for training, validating, and testing. Afterward, we applied 3 image enhancement methods and categorized our dataset into 3 groups: X Dataset, Y Dataset, and Z Dataset. X Dataset consists of images that were applied to HE (Histogram Equalization), which works by equalizing the pixel intensity distribution in the image to flatten the image contrast. The Y dataset contains images that have been applied to CLAHE (Contrast Limited Adaptive HE). Improving upon HE, CLAHE divides the image into smaller blocks and limits contrast amplification to avoid excessive noise in the same areas. Finally, the Z Dataset consists of images that have been applied DBST-LCM CLAHE (Dynamic Block Size Technique-Local Contrast Modification), where, before CLAHE, a combination of noise reduction based on shift transformation (DBST) and local contrast modification (LCM) is applied. <xref ref-type="fig" rid="fig-2">Fig. 2</xref> shows examples of images that have undergone image enhancement with (a) HE, (b) CLAHE, and (c) DBST-LCM CLAHE.</p>
<fig id="fig-2">
<label>Figure 2</label>
<caption>
<title>Examples of image enhancement methods with (<bold>a</bold>) HE, (<bold>b</bold>) CLAHE, and (<bold>c</bold>) DBST-LCM CLAHE</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMC_67381-fig-2.tif"/>
</fig>
<p>We implemented the YOLOv11x variant for forest and land fire detection, utilizing a configuration of depth_multiple &#x003D; 1.00, width_multiple &#x003D; 1.50, and max_channels &#x003D; 512. These values were not the result of empirical tuning but were adopted directly from the official Ultralytics YOLOv11 model configuration as the standard settings for the YOLOv11x variant. This ensures consistency with the original reference implementation and benchmark results. The model processes 640 &#x00D7; 640 &#x00D7; 3 input images through a deep convolutional pipeline. It features down-sampling stages via alternating Conv and C3 modules, which progressively reduce spatial resolution while increasing feature depth. The SPPF block aggregates multi-scale context before passing features to the neck, where they are unsampled and concatenated to enhance semantic richness. Final detection heads operate at resolutions of 80 &#x00D7; 80, 40 &#x00D7; 40, and 20 &#x00D7; 20 to detect small, medium, and large-scale objects, respectively. The architecture&#x2019;s combination of C3K2, SPPF, and C2PSA modules improves feature extraction efficiency and attention precision, capabilities particularly valuable for detecting subtle visual cues such as smoke. <xref ref-type="fig" rid="fig-3">Fig. 3</xref> illustrates the research workflow that is based on YOLOv11 architecture with customized depth, width, and max channels [<xref ref-type="bibr" rid="ref-22">22</xref>].</p>
<fig id="fig-3">
<label>Figure 3</label>
<caption>
<title>Research workflow</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMC_67381-fig-3.tif"/>
</fig>
<p>Model training was conducted on Google Collaboratory using an NVIDIA A100-SXM4-40GB GPU. The training process spanned 100 epochs with a batch size of 6 and an input resolution of 640 &#x00D7; 640 pixels. The selection of these hyperparameters was based on hardware availability and training stability considerations, in line with practices found in related studies [<xref ref-type="bibr" rid="ref-25">25</xref>].</p>
</sec>
<sec id="s3_3">
<label>3.3</label>
<title>Training Result</title>
<p><xref ref-type="table" rid="table-4">Table 4</xref> shows the results of training the YOLOv11x model on the D-Fire dataset, which has been processed using three different image enhancement techniques: Histogram Equalization (HE), CLAHE, and DBST-LCM CLAHE. During the training process, X Dataset (HE) achieved the best performance with an mAP50 score of 0.771, followed by Z Dataset (DBST-LCM CLAHE) with 0.770, and Y Dataset (CLAHE) with 0.759. These results provide an initial indication that the HE method offers strong support in the object detection model training process, even when compared to more complex methods like DBST-LCM CLAHE.</p>
<table-wrap id="table-4">
<label>Table 4</label>
<caption>
<title>Training results of the D-Fire dataset with YOLOv11x</title>
</caption>
<table>
<colgroup>
<col/>
<col/>
<col/>
<col/>
<col/>
<col/>
<col/>
<col/>
<col/>
<col/>
<col/>
<col/>
</colgroup>
<thead>
<tr>
<th>Class</th>
<th>Images</th>
<th>Instances</th>
<th colspan="3">HE (X Dataset)</th>
<th colspan="3">CLAHE (Y Dataset)</th>
<th align="center" colspan="3">DBST-LCM CLAHE (Z Dataset)</th>
</tr>
<tr>
<th></th>
<th></th>
<th></th>
<th>P</th>
<th>R</th>
<th>mAP50</th>
<th>P</th>
<th>R</th>
<th>mAP50</th>
<th>P</th>
<th>R</th>
<th>mAP50</th>
</tr>
</thead>
<tbody>
<tr>
<td>Smoke</td>
<td>2127</td>
<td>2419</td>
<td>0.834</td>
<td>0.791</td>
<td>0.84</td>
<td>0.822</td>
<td>0.79</td>
<td>0.833</td>
<td>0.839</td>
<td>0.788</td>
<td>0.846</td>
</tr>
<tr>
<td>Fire</td>
<td>1167</td>
<td>2993</td>
<td>0.735</td>
<td>0.615</td>
<td>0.703</td>
<td>0.704</td>
<td>0.6</td>
<td>0.685</td>
<td>0.741</td>
<td>0.605</td>
<td>0.694</td>
</tr>
<tr>
<td>All</td>
<td>4305</td>
<td>5412</td>
<td>0.784</td>
<td>0.703</td>
<td>0.771</td>
<td>0.763</td>
<td>0.695</td>
<td>0.759</td>
<td>079</td>
<td>0.697</td>
<td>0.77</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The evaluation of detection results is based on the Intersection over Union (IoU) value, which calculates the overlap ratio between the predicted box and the ground truth box. The formula can be seen in <xref ref-type="disp-formula" rid="eqn-1">Eq. (1)</xref>:
<disp-formula id="eqn-1"><label>(1)</label><mml:math id="mml-eqn-1" display="block"><mml:mi>I</mml:mi><mml:mi>o</mml:mi><mml:mi>U</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi>A</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:mi>p</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2229;</mml:mo><mml:mi>A</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:mi>g</mml:mi><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mrow><mml:mi>A</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:mi>p</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo>&#x222A;</mml:mo><mml:mi>A</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:mi>g</mml:mi><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac></mml:math></disp-formula></p>
<p>The prediction outputs are then classified into True Positive (TP), False Positive (FP), and False Negative (FN). This classification serves as the basis for calculating Precision and Recall, as shown in <xref ref-type="disp-formula" rid="eqn-2">Eqs. (2)</xref> and <xref ref-type="disp-formula" rid="eqn-3">(3)</xref> [<xref ref-type="bibr" rid="ref-27">27</xref>]. Another evaluation indicator is the F1 Score, which is the harmonic mean of Precision and Recall, as shown in <xref ref-type="disp-formula" rid="eqn-4">Eq. (4)</xref>.
<disp-formula id="eqn-2"><label>(2)</label><mml:math id="mml-eqn-2" display="block"><mml:mtable columnalign="right left right left right left right left right left right left" rowspacing="3pt" columnspacing="0em 2em 0em 2em 0em 2em 0em 2em 0em 2em 0em" displaystyle="true"><mml:mtr><mml:mtd></mml:mtd><mml:mtd><mml:mi>P</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>c</mml:mi><mml:mi>i</mml:mi><mml:mi>s</mml:mi><mml:mi>i</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi><mml:mspace width="thinmathspace"></mml:mspace><mml:mspace width="thinmathspace"></mml:mspace><mml:mo stretchy="false">(</mml:mo><mml:mi>P</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi>T</mml:mi><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mi>T</mml:mi><mml:mi>P</mml:mi><mml:mo>+</mml:mo><mml:mi>F</mml:mi><mml:mi>P</mml:mi></mml:mrow></mml:mfrac></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<disp-formula id="eqn-3"><label>(3)</label><mml:math id="mml-eqn-3" display="block"><mml:mtable columnalign="right left right left right left right left right left right left" rowspacing="3pt" columnspacing="0em 2em 0em 2em 0em 2em 0em 2em 0em 2em 0em" displaystyle="true"><mml:mtr><mml:mtd></mml:mtd><mml:mtd><mml:mi>R</mml:mi><mml:mi>e</mml:mi><mml:mi>c</mml:mi><mml:mi>a</mml:mi><mml:mi>l</mml:mi><mml:mi>l</mml:mi><mml:mspace width="thinmathspace"></mml:mspace><mml:mspace width="thinmathspace"></mml:mspace><mml:mo stretchy="false">(</mml:mo><mml:mi>R</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi>T</mml:mi><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mi>T</mml:mi><mml:mi>P</mml:mi><mml:mo>+</mml:mo><mml:mi>F</mml:mi><mml:mi>N</mml:mi></mml:mrow></mml:mfrac></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula></p>
<p>Based on the training results on the X, Y, and Z datasets, the model showed consistent detection performance, especially for the Smoke class. The highest mAP50 score for this class was achieved with the Z Dataset, reaching 0.846. In terms of Precision and Recall, the Smoke class in the Z Dataset recorded the highest Precision (0.839), while the highest Recall (0.791) was obtained from the X Dataset. On the other hand, for the Fire class, the highest mAP50 score was achieved with X Dataset (0.703), with the highest Precision (0.741) recorded in Z Dataset and the highest Recall (0.615) in X Dataset. Another evaluation index, F1, is shown in <xref ref-type="disp-formula" rid="eqn-4">Eq. (4)</xref>.
<disp-formula id="eqn-4"><label>(4)</label><mml:math id="mml-eqn-4" display="block"><mml:mi>F</mml:mi><mml:mn>1</mml:mn><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mn>2</mml:mn><mml:mo>&#x00D7;</mml:mo><mml:mi>P</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>c</mml:mi><mml:mi>i</mml:mi><mml:mi>s</mml:mi><mml:mi>i</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi><mml:mo>&#x00D7;</mml:mo><mml:mi>R</mml:mi><mml:mi>e</mml:mi><mml:mi>c</mml:mi><mml:mi>a</mml:mi><mml:mi>l</mml:mi><mml:mi>l</mml:mi></mml:mrow><mml:mrow><mml:mi>P</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>c</mml:mi><mml:mi>i</mml:mi><mml:mi>s</mml:mi><mml:mi>i</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi><mml:mo>+</mml:mo><mml:mi>R</mml:mi><mml:mi>e</mml:mi><mml:mi>c</mml:mi><mml:mi>a</mml:mi><mml:mi>l</mml:mi><mml:mi>l</mml:mi></mml:mrow></mml:mfrac></mml:math></disp-formula></p>
<p>The training processes for each enhanced dataset are visualized in <xref ref-type="fig" rid="fig-4">Fig. 4</xref> (X Dataset for HE), <xref ref-type="fig" rid="fig-5">Fig. 5</xref> (Y Dataset for CLAHE), and <xref ref-type="fig" rid="fig-6">Fig. 6</xref> (Z Dataset for DBST-LCM CLAHE). Across more than 30 epochs, the loss values for all datasets consistently declined, indicating successful convergence of the model parameters. Throughout training, the key components of the YOLOv11x loss function&#x2014;namely, box loss (localization error), classification loss, and distribution focal loss (bounding box refinement)&#x2014;exhibited a steady downward trend, reflecting stable and effective optimization during training. These loss components correspond to the model&#x2019;s efforts in improving bounding box localization, object confidence prediction, and class classification performance, which are essential for accurate fire and smoke detection in various environmental conditions.</p>
<fig id="fig-4">
<label>Figure 4</label>
<caption>
<title>Training process YOLOv11x with X Dataset (HE)</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMC_67381-fig-4.tif"/>
</fig><fig id="fig-5">
<label>Figure 5</label>
<caption>
<title>Training process YOLOv11x with Y Dataset (CLAHE)</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMC_67381-fig-5.tif"/>
</fig><fig id="fig-6">
<label>Figure 6</label>
<caption>
<title>Training process YOLOv11x with Z Dataset (DBST-LCM CLAHE)</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMC_67381-fig-6.tif"/>
</fig>
<p>While the overall trends were similar, minor differences in the loss values were observed across the three datasets. The model trained with the Z Dataset showed slightly lower overall loss values, particularly in the validation box and classification losses, suggesting better optimization. The Y Dataset achieved moderate loss reductions, whereas the X Dataset maintained slightly higher losses throughout training. These differences highlight the potential benefits of advanced image enhancement techniques in improving model performance.
<disp-formula id="eqn-5"><label>(5)</label><mml:math id="mml-eqn-5" display="block"><mml:mtable columnalign="right left right left right left right left right left right left" rowspacing="3pt" columnspacing="0em 2em 0em 2em 0em 2em 0em 2em 0em 2em 0em" displaystyle="true"><mml:mtr><mml:mtd><mml:mrow><mml:mi>&#x02112;</mml:mi></mml:mrow><mml:mo>=</mml:mo></mml:mtd><mml:mtd><mml:msub><mml:mi>&#x03BB;</mml:mi><mml:mrow><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>o</mml:mi><mml:mi>r</mml:mi><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:munderover><mml:mo movablelimits="false">&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:mrow><mml:mrow><mml:msup><mml:mi>S</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:mrow></mml:munderover><mml:munderover><mml:mo movablelimits="false">&#x2211;</mml:mo><mml:mrow><mml:mi>j</mml:mi><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:mrow><mml:mrow><mml:mi>B</mml:mi></mml:mrow></mml:munderover><mml:msubsup><mml:mi>&#x0131;</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow><mml:mrow><mml:mi>o</mml:mi><mml:mi>b</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msubsup><mml:mrow><mml:mo>[</mml:mo><mml:msup><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mrow><mml:mover><mml:mrow><mml:mrow><mml:mi>x</mml:mi></mml:mrow></mml:mrow><mml:mo stretchy="false">^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup><mml:mo>+</mml:mo><mml:msup><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mrow><mml:mi>y</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mrow><mml:mover><mml:mrow><mml:mrow><mml:mi>y</mml:mi></mml:mrow></mml:mrow><mml:mo stretchy="false">^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup><mml:mo>]</mml:mo></mml:mrow></mml:mtd></mml:mtr><mml:mtr><mml:mtd></mml:mtd><mml:mtd><mml:mi></mml:mi><mml:mspace width="1em"></mml:mspace><mml:mo>+</mml:mo><mml:msub><mml:mi>&#x03BB;</mml:mi><mml:mrow><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>o</mml:mi><mml:mi>r</mml:mi><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:munderover><mml:mo movablelimits="false">&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:mrow><mml:mrow><mml:msup><mml:mi>S</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:mrow></mml:munderover><mml:munderover><mml:mo movablelimits="false">&#x2211;</mml:mo><mml:mrow><mml:mi>j</mml:mi><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:mrow><mml:mrow><mml:mi>B</mml:mi></mml:mrow></mml:munderover><mml:msubsup><mml:mi>&#x0131;</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow><mml:mrow><mml:mi>o</mml:mi><mml:mi>b</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msubsup><mml:mrow><mml:mo>[</mml:mo><mml:msup><mml:mrow><mml:mo>(</mml:mo><mml:msqrt><mml:msub><mml:mrow><mml:mi>w</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:msqrt><mml:mo>&#x2212;</mml:mo><mml:msqrt><mml:msub><mml:mrow><mml:mover><mml:mrow><mml:mrow><mml:mi>w</mml:mi></mml:mrow></mml:mrow><mml:mo stretchy="false">^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:msqrt><mml:mo>)</mml:mo></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup><mml:mo>+</mml:mo><mml:msup><mml:mrow><mml:mo>(</mml:mo><mml:msqrt><mml:msub><mml:mrow><mml:mi>h</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:msqrt><mml:mo>&#x2212;</mml:mo><mml:msqrt><mml:msub><mml:mrow><mml:mover><mml:mrow><mml:mrow><mml:mi>h</mml:mi></mml:mrow></mml:mrow><mml:mo stretchy="false">^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:msqrt><mml:mo>)</mml:mo></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup><mml:mo>]</mml:mo></mml:mrow><mml:mo>+</mml:mo><mml:munderover><mml:mo movablelimits="false">&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:mrow><mml:mrow><mml:msup><mml:mi>S</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:mrow></mml:munderover><mml:munderover><mml:mo movablelimits="false">&#x2211;</mml:mo><mml:mrow><mml:mi>j</mml:mi><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:mrow><mml:mrow><mml:mi>B</mml:mi></mml:mrow></mml:munderover><mml:msubsup><mml:mi>&#x0131;</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow><mml:mrow><mml:mi>o</mml:mi><mml:mi>b</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msubsup><mml:msup><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mrow><mml:mi>C</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mrow><mml:mover><mml:mrow><mml:mrow><mml:mi>C</mml:mi></mml:mrow></mml:mrow><mml:mo stretchy="false">^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:mtd></mml:mtr><mml:mtr><mml:mtd></mml:mtd><mml:mtd><mml:mi></mml:mi><mml:mspace width="1em"></mml:mspace><mml:mo>+</mml:mo><mml:msub><mml:mi>&#x03BB;</mml:mi><mml:mrow><mml:mi>n</mml:mi><mml:mi>o</mml:mi><mml:mi>o</mml:mi><mml:mi>b</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:munderover><mml:mo movablelimits="false">&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:mrow><mml:mrow><mml:msup><mml:mi>S</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:mrow></mml:munderover><mml:munderover><mml:mo movablelimits="false">&#x2211;</mml:mo><mml:mrow><mml:mi>j</mml:mi><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:mrow><mml:mrow><mml:mi>B</mml:mi></mml:mrow></mml:munderover><mml:msubsup><mml:mi>&#x0131;</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow><mml:mrow><mml:mi>n</mml:mi><mml:mi>o</mml:mi><mml:mi>o</mml:mi><mml:mi>b</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msubsup><mml:msup><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mrow><mml:mi>C</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mrow><mml:mover><mml:mrow><mml:mrow><mml:mi>C</mml:mi></mml:mrow></mml:mrow><mml:mo stretchy="false">^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup><mml:mo>+</mml:mo><mml:munderover><mml:mo movablelimits="false">&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:mrow><mml:mrow><mml:msup><mml:mi>S</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:mrow></mml:munderover><mml:msubsup><mml:mi>&#x0131;</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow><mml:mrow><mml:mi>o</mml:mi><mml:mi>b</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msubsup><mml:munder><mml:mo movablelimits="false">&#x2211;</mml:mo><mml:mrow><mml:mi>c</mml:mi><mml:mi>&#x03B5;</mml:mi><mml:mi>c</mml:mi><mml:mi>l</mml:mi><mml:mi>a</mml:mi><mml:mi>s</mml:mi><mml:mi>s</mml:mi><mml:mi>e</mml:mi><mml:mi>s</mml:mi></mml:mrow></mml:munder><mml:msup><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mrow><mml:mi>p</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>c</mml:mi></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mrow><mml:mover><mml:mrow><mml:mrow><mml:mi>p</mml:mi></mml:mrow></mml:mrow><mml:mo stretchy="false">^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:mi>c</mml:mi><mml:mo>)</mml:mo></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula></p>
<p>As described in <xref ref-type="disp-formula" rid="eqn-5">Eq. (5)</xref>, the YOLOv11x model adopts a loss <inline-formula id="ieqn-1"><mml:math id="mml-ieqn-1"><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>&#x02112;</mml:mi></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:math></inline-formula> function structure based on the basic YOLO architecture. This loss function minimizes errors in predicting bounding box parameters. <inline-formula id="ieqn-2"><mml:math id="mml-ieqn-2"><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mo>,</mml:mo><mml:mrow><mml:mi>y</mml:mi></mml:mrow><mml:mo>,</mml:mo><mml:mrow><mml:mi>w</mml:mi></mml:mrow><mml:mo>,</mml:mo><mml:mrow><mml:mi>h</mml:mi></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:math></inline-formula>, object confidence <inline-formula id="ieqn-3"><mml:math id="mml-ieqn-3"><mml:mrow><mml:mi>C</mml:mi></mml:mrow></mml:math></inline-formula>, and class probabilities <inline-formula id="ieqn-4"><mml:math id="mml-ieqn-4"><mml:mrow><mml:mi>p</mml:mi></mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>c</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:math></inline-formula> [<xref ref-type="bibr" rid="ref-27">27</xref>]. Here, the symbols <inline-formula id="ieqn-5"><mml:math id="mml-ieqn-5"><mml:mrow><mml:mover><mml:mrow><mml:mrow><mml:mi>x</mml:mi></mml:mrow></mml:mrow><mml:mo stretchy="false">^</mml:mo></mml:mover></mml:mrow><mml:mo>,</mml:mo><mml:mrow><mml:mover><mml:mrow><mml:mover><mml:mrow><mml:mrow><mml:mi>y</mml:mi></mml:mrow></mml:mrow><mml:mo>^</mml:mo></mml:mover></mml:mrow><mml:mo>^</mml:mo></mml:mover></mml:mrow><mml:mo>,</mml:mo><mml:mrow><mml:mover><mml:mrow><mml:mrow><mml:mi>w</mml:mi></mml:mrow></mml:mrow><mml:mo>^</mml:mo></mml:mover></mml:mrow><mml:mo>,</mml:mo><mml:mrow><mml:mover><mml:mrow><mml:mrow><mml:mi>h</mml:mi></mml:mrow></mml:mrow><mml:mo stretchy="false">^</mml:mo></mml:mover></mml:mrow><mml:mo>,</mml:mo><mml:mrow><mml:mover><mml:mrow><mml:mrow><mml:mi>C</mml:mi></mml:mrow></mml:mrow><mml:mo stretchy="false">^</mml:mo></mml:mover></mml:mrow><mml:mo>,</mml:mo><mml:mrow><mml:mover><mml:mrow><mml:mrow><mml:mi>p</mml:mi></mml:mrow></mml:mrow><mml:mo stretchy="false">^</mml:mo></mml:mover></mml:mrow></mml:math></inline-formula> represent the predicted values, while the corresponding ground truth labels are denoted without a hat. The indicator <inline-formula id="ieqn-6"><mml:math id="mml-ieqn-6"><mml:msubsup><mml:mi>&#x0131;</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow><mml:mrow><mml:mi>o</mml:mi><mml:mi>b</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msubsup></mml:math></inline-formula> identifies whether an object exists within a cell <inline-formula id="ieqn-7"><mml:math id="mml-ieqn-7"><mml:mi>i</mml:mi></mml:math></inline-formula> and whether the <inline-formula id="ieqn-8"><mml:math id="mml-ieqn-8"><mml:mi>j</mml:mi></mml:math></inline-formula>-th bounding box predictor is responsible for that detection. In the training configuration, the hyperparameter <inline-formula id="ieqn-9"><mml:math id="mml-ieqn-9"><mml:msub><mml:mi>&#x03BB;</mml:mi><mml:mrow><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>o</mml:mi><mml:mi>r</mml:mi><mml:mi>d</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> was set to 0.5, assigning moderate weight to localization errors. Similarly, <inline-formula id="ieqn-10"><mml:math id="mml-ieqn-10"><mml:msub><mml:mi>&#x03BB;</mml:mi><mml:mrow><mml:mi>n</mml:mi><mml:mi>o</mml:mi><mml:mi>o</mml:mi><mml:mi>b</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> was set to 0.5 to reduce the impact of prediction in grid cells without objects, preventing distraction from large background regions.</p>
<p>YOLOv11x introduces significant advancements in real-time object detection, emphasizing both efficiency and precision. Its architecture integrates modules such as the C3K2 block for lightweight feature extraction and the C2PSA module for enhanced spatial attention. During training, YOLOv11x employed a combination of data augmentation techniques, including HSV augmentation, random rotation, translation, perspective transformation, scaling, and both vertical and horizontal flipping. Additionally, advanced augmentations such as Mosaic and MixUp were applied to improve generalization across diverse environmental conditions, with optional use of CutMix further enhancing training robustness.</p>
</sec>
</sec>
<sec id="s4">
<label>4</label>
<title>Results and Discussions</title>
<p>To visualize the performance of the YOLOv11x model across different image enhancement techniques, <xref ref-type="fig" rid="fig-7">Figs. 7</xref>&#x2013;<xref ref-type="fig" rid="fig-9">9</xref> present the validation results for datasets enhanced using Histogram Equalization (HE), Contrast Limited Adaptive Histogram Equalization (CLAHE), and DBST-LCM CLAHE, respectively. These figures demonstrate the model&#x2019;s ability to detect fire and smoke under various visual conditions. The HE-enhanced dataset (<xref ref-type="fig" rid="fig-7">Fig. 7</xref>) shows consistent detection, but with relatively moderate confidence scores. In contrast, the CLAHE-enhanced dataset (<xref ref-type="fig" rid="fig-8">Fig. 8</xref>) reveals slightly improved detection clarity in several images but also includes some lower confidence values and misidentifications. The DBST-LCM CLAHE dataset (<xref ref-type="fig" rid="fig-9">Fig. 9</xref>) displays the highest consistency in detection with generally stronger confidence levels, especially for both smoke and fire, indicating better contrast and feature enhancement.</p>
<fig id="fig-7">
<label>Figure 7</label>
<caption>
<title>Validation Batch 0 with YOLOv11x and X Dataset (HE)</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMC_67381-fig-7.tif"/>
</fig><fig id="fig-8">
<label>Figure 8</label>
<caption>
<title>Validation Batch 0 with YOLOv11x and Y Dataset (CLAHE)</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMC_67381-fig-8.tif"/>
</fig><fig id="fig-9">
<label>Figure 9</label>
<caption>
<title>Validation Batch 0 with YOLOv11x and Z Dataset (DBST-LCM CLAHE)</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMC_67381-fig-9.tif"/>
</fig>
<p>To further support this observation, <xref ref-type="fig" rid="fig-10">Fig. 10</xref> presents key evaluation metrics for the DBST-LCM CLAHE-enhanced dataset. The precision-recall (PR) curve shows high precision values for smoke (0.864) and fire (0.694), contributing to a mean Average Precision at 50% Intersection over Union (mAP@50) of 0.770 across all classes. The F1 curve peaks at 0.74 at a confidence threshold of 0.353, indicating a good balance between precision and recall. The confusion matrix supports this finding, with 2016 out of 2047 smoke instances correctly identified (98.49% true positive rate) and 2071 out of 2179 fire instances accurately detected (95.63%). Although 905 background instances were misclassified as fire, the primary object classes remained distinguishable. These results highlight the capability of DBST-LCM CLAHE in improving both detection accuracy and object localization, especially under challenging visual conditions involving dispersed smoke.</p>
<fig id="fig-10">
<label>Figure 10</label>
<caption>
<title>Performance of DBST-LCM CLAHE based on precision-recall (PR) curves (<bold>a</bold>), per-class F1-scores (<bold>b</bold>), and confusion matrix (<bold>c</bold>)</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMC_67381-fig-10.tif"/>
</fig>
<p><xref ref-type="table" rid="table-5">Table 5</xref> presents the validation or testing results of the YOLOv11x model on the three image-enhanced datasets. Z Dataset achieved the highest Precision score of 0.79, indicating its strong ability to correctly identify fire and smoke with fewer false positives. Meanwhile, X Dataset maintained the highest mAP50 score of 0.771 and the highest Recall at 0.703, showing better overall detection accuracy and generalization. This suggests that while DBST-LCM CLAHE (applied on Z Dataset) is highly precise, HE (applied on X Dataset) remains more balanced and effective in capturing a broader range of fire and smoke instances, especially in varied or low-light conditions. CLAHE, although beneficial for enhancing local contrast, showed slightly lower performance in comparison to the other two techniques.</p>
<table-wrap id="table-5">
<label>Table 5</label>
<caption>
<title>Validating the result of the D-Fire dataset with YOLOv11x</title>
</caption>
<table>
<colgroup>
<col/>
<col/>
<col/>
<col/>
<col/>
<col/>
<col/>
<col/>
<col/>
<col/>
<col/>
<col/>
</colgroup>
<thead>
<tr>
<th>Class</th>
<th>Images</th>
<th>Instances</th>
<th colspan="3">HE (X Dataset)</th>
<th colspan="3">CLAHE (Y Dataset)</th>
<th align="center" colspan="3">DBST-LCM CLAHE (Z Dataset)</th>
</tr>
<tr>
<th></th>
<th></th>
<th></th>
<th>P</th>
<th>R</th>
<th>mAP50</th>
<th>P</th>
<th>R</th>
<th>mAP50</th>
<th>P</th>
<th>R</th>
<th>mAP50</th>
</tr>
</thead>
<tbody>
<tr>
<td>Smoke</td>
<td>2127</td>
<td>2419</td>
<td>0.834</td>
<td>0.791</td>
<td>0.84</td>
<td>0.821</td>
<td>0.791</td>
<td>0.833</td>
<td>0.839</td>
<td>0.788</td>
<td>0.846</td>
</tr>
<tr>
<td>Fire</td>
<td>1167</td>
<td>2993</td>
<td>0.735</td>
<td>0.615</td>
<td>0.703</td>
<td>0.706</td>
<td>0.601</td>
<td>0.685</td>
<td>0.741</td>
<td>0.605</td>
<td>0.694</td>
</tr>
<tr>
<td>All</td>
<td>4305</td>
<td>5412</td>
<td>0.784</td>
<td>0.703</td>
<td>0.771</td>
<td>0.763</td>
<td>0.695</td>
<td>0.759</td>
<td>079</td>
<td>0.697</td>
<td>0.77</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>In addition, the detection performance of YOLOv11x on each dataset is visualized in <xref ref-type="fig" rid="fig-11">Fig. 11</xref>. When comparing the different image enhancement techniques, DBST-LCM CLAHE provides better object localization for complex smoke features, especially under varying lighting and contrast conditions. This is noticeable in the second and third rows of predictions, where smoke areas are more comprehensively mapped. However, in some cases, such as those shown in the first column of <xref ref-type="fig" rid="fig-11">Fig. 11</xref>, X Dataset allows the model to detect fire and smoke more completely and with higher confidence scores, likely due to the global contrast enhancement provided by HE.</p>
<fig id="fig-11">
<label>Figure 11</label>
<caption>
<title>Recognition results with YOLOv11x</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMC_67381-fig-11.tif"/>
</fig>
<p>Each image enhancement method contributes uniquely to the detection process. (1) Histogram Equalization (HE) improves global contrast, making image features easier to recognize in dark or low-light areas. (2) CLAHE enhances local contrast and preserves excellent details, supporting more precise detection in regions with small intensity variations. (3) DBST-LCM CLAHE, as a hybrid method, balances both local and global contrast enhancement, making it effective for handling complex scenarios such as smoke dispersion. (4) Additionally, the DBST preprocessing helps suppress background interference, while the LCM operation helps maintain object structure. (5) Although more computationally complex, the contribution of DBST-LCM CLAHE, particularly its highest precision score, demonstrates its strength in reducing false positives and improving detection robustness under real-world conditions.</p>
<p>Overall, all three enhancement techniques have proven to improve detection quality in diverse ways. Histogram Equalization remains an efficient and practical approach with the highest overall detection score. However, for applications that require high sensitivity and precision, such as early smoke detection, DBST-LCM CLAHE shows promising results and potential for further development.</p>
</sec>
<sec id="s5">
<label>5</label>
<title>Conclusions</title>
<p>This study aimed to empirically evaluate the effectiveness of three image enhancement techniques&#x2014;Histogram Equalization (HE), Contrast Limited Adaptive Histogram Equalization (CLAHE), and DBST-LCM CLAHE&#x2014;on the object detection performance of the YOLOv11x model for early Forest and Land Fire (FLF) detection. Using the D-Fire dataset, which includes over 21,000 annotated images representing varied times and weather conditions, the model was trained and tested to reflect realistic environmental challenges.</p>
<p>Results show that all three enhancement techniques positively impact detection accuracy, especially in low-illumination and high-noise scenarios. HE demonstrated the highest mean Average Precision at 50% IoU (mAP50) of 0.771, along with a balanced performance in Precision (0.784) and Recall (0.703), indicating strong generalization across fire and smoke cases. In contrast, DBST-LCM CLAHE achieved the highest Precision score (0.790), reducing false positives and demonstrating superior robustness in complex scenes, such as dispersed smoke. CLAHE performed slightly lower but remained valuable in enhancing local feature details.</p>
<p>The detection visualizations further confirm that each enhancement method contributes uniquely: HE improves global contrast, aiding overall detection; CLAHE accentuates local details; while DBST-LCM CLAHE successfully combines both strategies, with added benefits from DBST in noise suppression and LCM in structure preservation. These differences suggest that the enhancement method can be selected based on detection priority&#x2014;completeness, precision, or clarity in complex backgrounds.</p>
<p>Importantly, this study contributes not by proposing new architectures but by benchmarking YOLOv11x&#x2014;one of the most recent lightweight detectors&#x2014;under realistic FLF conditions enhanced by proven preprocessing strategies. The inclusion of diverse evaluation metrics such as F1-score, precision-recall curves, and qualitative assessments (e.g., bounding box visualizations) strengthens the credibility of these findings.</p>
<p>For future development, one valuable direction would be the deployment of the system on edge computing platforms, evaluating real-time inference speed, power consumption, and detection latency. Additionally, training a localized model using region-specific data, such as forest imagery from Indonesia, could further improve detection accuracy by incorporating native vegetation types, fire patterns, and regional atmospheric conditions.</p>
<p>Ultimately, this research provides practical insights into how advanced image enhancement techniques, when combined with modern detectors like YOLOv11x, can deliver high-performance FLF detection systems suited for early warning applications in real-world environments.</p>
</sec>
</body>
<back>
<ack>
<p>The authors thank the Vice-Rector of Research, Innovation, and Entrepreneurship at Satya Wacana Christian University.</p>
</ack>
<sec>
<title>Funding Statement</title>
<p>This research was funded by the Directorate of Research, Technology, and Community Service, Ministry of Higher Education, Science, and Technology of the Republic of Indonesia the Regular Fundamental Research scheme, with grant numbers 001/LL6/PL/AL.04/2025, 011/SPK-PFR/RIK/05/2025.</p>
</sec>
<sec>
<title>Author Contributions</title>
<p>The authors confirm contribution to the paper as follows: study conception and design: Christine Dewi; Melati Viaeritas Vitrieco Santoso; Abbott Po Shun Chen; Hanna Prillysca Chernovita; data collection: Melati Viaeritas Vitrieco Santoso; Stephen Abednego Philemon; analysis and interpretation of results: Evangs Mailoa; Christine Dewi; Hanna Prillysca Chernovita; Abbott Po Shun Chen; draft manuscript preparation: Abbott Po Shun Chen; Christine Dewi; Hanna Prillysca Chernovita; Evangs Mailoa; Stephen Abednego Philemon. All authors reviewed the results and approved the final version of the manuscript.</p>
</sec>
<sec sec-type="data-availability">
<title>Availability of Data and Materials</title>
<p>D-Fire: an image dataset for fire and smoke detection (<ext-link ext-link-type="uri" xlink:href="https://github.com/gaiasd/DFireDataset">https://github.com/gaiasd/DFireDataset</ext-link>, accessed on 1 December 2024).</p>
</sec>
<sec>
<title>Ethics Approval</title>
<p>Not applicable.</p>
</sec>
<sec sec-type="COI-statement">
<title>Conflicts of Interest</title>
<p>The authors declare no conflicts of interest to report regarding the present study.</p>
</sec>
<ref-list content-type="authoryear">
<title>References</title>
<ref id="ref-1"><label>[1]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Akter</surname> <given-names>S</given-names></string-name>, <string-name><surname>Grafton</surname> <given-names>RQ</given-names></string-name></person-group>. <article-title>Do fires discriminate? Socio-economic disadvantage, wildfire hazard exposure and the Australian 2019&#x2013;20 &#x2018;Black Summer&#x2019; fires</article-title>. <source>Clim Change</source>. <year>2021</year>;<volume>165</volume>(<issue>3</issue>):<fpage>53</fpage>. doi:<pub-id pub-id-type="doi">10.1007/s10584-021-03064-6</pub-id>.</mixed-citation></ref>
<ref id="ref-2"><label>[2]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Gon&#x00E7;alves</surname> <given-names>LAO</given-names></string-name>, <string-name><surname>Ghali</surname> <given-names>R</given-names></string-name>, <string-name><surname>Akhloufi</surname> <given-names>MA</given-names></string-name></person-group>. <article-title>YOLO-based models for smoke and wildfire detection in ground and aerial images</article-title>. <source>Fire</source>. <year>2024</year>;<volume>7</volume>(<issue>4</issue>):<fpage>140</fpage>. doi:<pub-id pub-id-type="doi">10.3390/fire7040140</pub-id>.</mixed-citation></ref>
<ref id="ref-3"><label>[3]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Wicaksono</surname> <given-names>P</given-names></string-name></person-group>. <article-title>Deep learning wildfire detection to increase fire safety with YOLOv8</article-title>. <source>Int J Intell Syst Appl Eng</source>. <year>2024</year>;<volume>12</volume>(<issue>3</issue>):<fpage>4383</fpage>.</mixed-citation></ref>
<ref id="ref-4"><label>[4]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Alkhammash</surname> <given-names>EH</given-names></string-name></person-group>. <article-title>A comparative analysis of YOLOv9, YOLOv10, YOLOv11 for smoke and fire detection</article-title>. <source>Fire</source>. <year>2025</year>;<volume>8</volume>(<issue>1</issue>):<fpage>26</fpage>. doi:<pub-id pub-id-type="doi">10.3390/fire8010026</pub-id>.</mixed-citation></ref>
<ref id="ref-5"><label>[5]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>de Ven&#x00E2;ncio</surname> <given-names>PVAB</given-names></string-name>, <string-name><surname>Lisboa</surname> <given-names>AC</given-names></string-name>, <string-name><surname>Barbosa</surname> <given-names>AV</given-names></string-name></person-group>. <article-title>An automatic fire detection system based on deep convolutional neural networks for low-power, resource-constrained devices</article-title>. <source>Neural Comput Appl</source>. <year>2022</year>;<volume>34</volume>(<issue>18</issue>):<fpage>15349</fpage>&#x2013;<lpage>68</lpage>. doi:<pub-id pub-id-type="doi">10.1007/s00521-022-07467-z</pub-id>.</mixed-citation></ref>
<ref id="ref-6"><label>[6]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Pan</surname> <given-names>J</given-names></string-name>, <string-name><surname>Ou</surname> <given-names>X</given-names></string-name>, <string-name><surname>Xu</surname> <given-names>L</given-names></string-name></person-group>. <article-title>A collaborative region detection and grading framework for forest fire smoke using weakly supervised fine segmentation and lightweight faster-RCNN</article-title>. <source>Forests</source>. <year>2021</year>;<volume>12</volume>(<issue>6</issue>):<fpage>768</fpage>. doi:<pub-id pub-id-type="doi">10.3390/f12060768</pub-id>.</mixed-citation></ref>
<ref id="ref-7"><label>[7]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Perrolas</surname> <given-names>G</given-names></string-name>, <string-name><surname>Niknejad</surname> <given-names>M</given-names></string-name>, <string-name><surname>Ribeiro</surname> <given-names>R</given-names></string-name>, <string-name><surname>Bernardino</surname> <given-names>A</given-names></string-name></person-group>. <article-title>Scalable fire and smoke segmentation from aerial images using convolutional neural networks and quad-tree search</article-title>. <source>Sensors</source>. <year>2022</year>;<volume>22</volume>(<issue>5</issue>):<fpage>1701</fpage>. doi:<pub-id pub-id-type="doi">10.3390/s22051701</pub-id>; <pub-id pub-id-type="pmid">35270848</pub-id></mixed-citation></ref>
<ref id="ref-8"><label>[8]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Li</surname> <given-names>T</given-names></string-name>, <string-name><surname>Zhang</surname> <given-names>C</given-names></string-name>, <string-name><surname>Zhu</surname> <given-names>H</given-names></string-name>, <string-name><surname>Zhang</surname> <given-names>J</given-names></string-name></person-group>. <article-title>Adversarial fusion network for forest fire smoke detection</article-title>. <source>Forests</source>. <year>2022</year>;<volume>13</volume>(<issue>3</issue>):<fpage>366</fpage>. doi:<pub-id pub-id-type="doi">10.3390/f13030366</pub-id>.</mixed-citation></ref>
<ref id="ref-9"><label>[9]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Li</surname> <given-names>C</given-names></string-name>, <string-name><surname>Du</surname> <given-names>Y</given-names></string-name>, <string-name><surname>Zhang</surname> <given-names>X</given-names></string-name>, <string-name><surname>Wu</surname> <given-names>P</given-names></string-name></person-group>. <article-title>YOLOGX: an improved forest fire detection algorithm based on YOLOv8</article-title>. <source>Front Environ Sci</source>. <year>2025</year>;<volume>12</volume>:<fpage>1486212</fpage>. doi:<pub-id pub-id-type="doi">10.3389/fenvs.2024.1486212</pub-id>.</mixed-citation></ref>
<ref id="ref-10"><label>[10]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Muksimova</surname> <given-names>S</given-names></string-name>, <string-name><surname>Umirzakova</surname> <given-names>S</given-names></string-name>, <string-name><surname>Mardieva</surname> <given-names>S</given-names></string-name>, <string-name><surname>Abdullaev</surname> <given-names>M</given-names></string-name>, <string-name><surname>Cho</surname> <given-names>YI</given-names></string-name></person-group>. <article-title>Revolutionizing wildfire detection through UAV-driven fire monitoring with a transformer-based approach</article-title>. <source>Fire</source>. <year>2024</year>;<volume>7</volume>(<issue>12</issue>):<fpage>443</fpage>. doi:<pub-id pub-id-type="doi">10.3390/fire7120443</pub-id>.</mixed-citation></ref>
<ref id="ref-11"><label>[11]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Farhan</surname> <given-names>MS</given-names></string-name>, <string-name><surname>Sthevanie</surname> <given-names>F</given-names></string-name>, <string-name><surname>Ramadhani</surname> <given-names>KN</given-names></string-name></person-group>. <article-title>Video based fire detection method using CNN and YOLO Version 4</article-title>. <source>Indones J Comput</source>. <year>2022</year>;<volume>7</volume>(<issue>2</issue>):<fpage>65</fpage>&#x2013;<lpage>78</lpage>. doi:<pub-id pub-id-type="doi">10.34818/INDOJC.2022.7.2.654</pub-id>.</mixed-citation></ref>
<ref id="ref-12"><label>[12]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Ayumi</surname> <given-names>V</given-names></string-name>, <string-name><surname>Noprisson</surname> <given-names>H</given-names></string-name>, <string-name><surname>Ani</surname> <given-names>N</given-names></string-name></person-group>. <article-title>Forest fire detection using transfer learning model with contrast enhancement and data augmentation</article-title>. <source>J Nas Pendidik Teknik Inform</source>. <year>2024</year>;<volume>13</volume>(<issue>1</issue>):<fpage>1</fpage>&#x2013;<lpage>10</lpage>. doi:<pub-id pub-id-type="doi">10.23887/janapati.v13i1.75692</pub-id>.</mixed-citation></ref>
<ref id="ref-13"><label>[13]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Chakraverti</surname> <given-names>S</given-names></string-name>, <string-name><surname>Agarwal</surname> <given-names>P</given-names></string-name>, <string-name><surname>Pattanayak</surname> <given-names>HS</given-names></string-name>, <string-name><surname>Chauhan</surname> <given-names>SPS</given-names></string-name>, <string-name><surname>Chakraverti</surname> <given-names>AK</given-names></string-name>, <string-name><surname>Kumar</surname> <given-names>M</given-names></string-name></person-group>. <article-title>De-noising the image using DBST-LCM-CLAHE: a deep learning approach</article-title>. <source>Multimed Tools Appl</source>. <year>2024</year>;<volume>83</volume>(<issue>4</issue>):<fpage>11017</fpage>&#x2013;<lpage>42</lpage>. doi:<pub-id pub-id-type="doi">10.1007/s11042-023-16016-2</pub-id>.</mixed-citation></ref>
<ref id="ref-14"><label>[14]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Mowla</surname> <given-names>MN</given-names></string-name>, <string-name><surname>Asadi</surname> <given-names>D</given-names></string-name>, <string-name><surname>Tekeoglu</surname> <given-names>KN</given-names></string-name>, <string-name><surname>Masum</surname> <given-names>S</given-names></string-name>, <string-name><surname>Rabie</surname> <given-names>K</given-names></string-name></person-group>. <article-title>UAVs-FFDB: a high-resolution dataset for advancing forest fire detection and monitoring using unmanned aerial vehicles (UAVs)</article-title>. <source>Data Brief</source>. <year>2024</year>;<volume>55</volume>(<issue>1</issue>):<fpage>110706</fpage>. doi:<pub-id pub-id-type="doi">10.1016/j.dib.2024.110706</pub-id>; <pub-id pub-id-type="pmid">39076831</pub-id></mixed-citation></ref>
<ref id="ref-15"><label>[15]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Sitanggang</surname> <given-names>IS</given-names></string-name>, <string-name><surname>Syaufina</surname> <given-names>L</given-names></string-name>, <string-name><surname>Trisminingsih</surname> <given-names>R</given-names></string-name>, <string-name><surname>Ramdhany</surname> <given-names>D</given-names></string-name>, <string-name><surname>Nuradi</surname> <given-names>E</given-names></string-name>, <string-name><surname>Hidayat</surname> <given-names>MFA</given-names></string-name>, <etal>et al</etal></person-group>. <article-title>Indonesian forest and land fire prevention patrol system</article-title>. <source>Fire</source>. <year>2022</year>;<volume>5</volume>(<issue>5</issue>):<fpage>136</fpage>. doi:<pub-id pub-id-type="doi">10.3390/fire5050136</pub-id>.</mixed-citation></ref>
<ref id="ref-16"><label>[16]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Saleh</surname> <given-names>A</given-names></string-name>, <string-name><surname>Zulkifley</surname> <given-names>MA</given-names></string-name>, <string-name><surname>Harun</surname> <given-names>HH</given-names></string-name>, <string-name><surname>Gaudreault</surname> <given-names>F</given-names></string-name>, <string-name><surname>Davison</surname> <given-names>I</given-names></string-name>, <string-name><surname>Spraggon</surname> <given-names>M</given-names></string-name></person-group>. <article-title>Forest fire surveillance systems: a review of deep learning methods</article-title>. <source>Heliyon</source>. <year>2024</year>;<volume>10</volume>(<issue>1</issue>):<fpage>e23127</fpage>. doi:<pub-id pub-id-type="doi">10.1016/j.heliyon.2023.e23127</pub-id>; <pub-id pub-id-type="pmid">38163175</pub-id></mixed-citation></ref>
<ref id="ref-17"><label>[17]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Winarno</surname> <given-names>G</given-names></string-name>, <string-name><surname>Irsal</surname> <given-names>M</given-names></string-name>, <string-name><surname>Karenina</surname> <given-names>CA</given-names></string-name>, <string-name><surname>Sari</surname> <given-names>G</given-names></string-name>, <string-name><surname>Hidayati</surname> <given-names>RN</given-names></string-name></person-group>. <article-title>Metode histogram equalization untuk peningkatan kualitas citra dengan menggunakan studi phantom lumbosacral</article-title>. <source>J Kesehat</source>. <year>2022</year>;<volume>7</volume>(<issue>2</issue>):<fpage>104</fpage>. doi:<pub-id pub-id-type="doi">10.22146/jkesvo.71469</pub-id>.</mixed-citation></ref>
<ref id="ref-18"><label>[18]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>H&#x00E4;rtinger</surname> <given-names>P</given-names></string-name>, <string-name><surname>Steger</surname> <given-names>C</given-names></string-name></person-group>. <article-title>Adaptive histogram equalization in constant time</article-title>. <source>J Real Time Image Process</source>. <year>2024</year>;<volume>21</volume>(<issue>3</issue>):<fpage>93</fpage>. doi:<pub-id pub-id-type="doi">10.1007/s11554-024-01465-1</pub-id>.</mixed-citation></ref>
<ref id="ref-19"><label>[19]</label><mixed-citation publication-type="conf-proc"><person-group person-group-type="author"><string-name><surname>Singh</surname> <given-names>P</given-names></string-name>, <string-name><surname>Ganotra</surname> <given-names>D</given-names></string-name></person-group>. <article-title>Histogram based resolution enhancement of an image by using artificial neural network</article-title>. In: <conf-name>Proceedings of the 2021 5th International Conference on Computing Methodologies and Communication (ICCMC); 2021 Apr 8&#x2013;10</conf-name>; <publisher-loc>Erode, India</publisher-loc>. doi:<pub-id pub-id-type="doi">10.1109/ICCMC51019.2021.9418295</pub-id>.</mixed-citation></ref>
<ref id="ref-20"><label>[20]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Sai</surname> <given-names>VT</given-names></string-name>, <string-name><surname>Sai Akhil</surname> <given-names>NE</given-names></string-name>, <string-name><surname>Jashnavi</surname> <given-names>TJM</given-names></string-name>, <string-name><surname>Kanakala</surname> <given-names>NVK</given-names></string-name></person-group>. <article-title>Image quality enhancement for wheat rust diseased leaf image using histogram equalization &#x0026; CLAHE</article-title>. <source>E3S Web Conf</source>. <year>2023</year>;<volume>391</volume>:<fpage>01029</fpage>. doi:<pub-id pub-id-type="doi">10.1051/e3sconf/202339101029</pub-id>.</mixed-citation></ref>
<ref id="ref-21"><label>[21]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Sayyid</surname> <given-names>MFN</given-names></string-name></person-group>. <article-title>Klasifikasi penyakit daun jagung menggunakan metode CNN dengan image processing HE Dan CLAHE</article-title>. <source>J Tek Inform Dan Teknol Inf</source>. <year>2024</year>;<volume>4</volume>(<issue>1</issue>):<fpage>86</fpage>&#x2013;<lpage>95</lpage>. <comment>(In Indonesian)</comment>. doi:<pub-id pub-id-type="doi">10.55606/jutiti.v4i1.3425</pub-id>.</mixed-citation></ref>
<ref id="ref-22"><label>[22]</label><mixed-citation publication-type="other"><person-group person-group-type="author"><string-name><surname>Ghosh</surname> <given-names>A</given-names></string-name></person-group>. <article-title>YOLO11: redefining real-time object detection [Online]</article-title>. <comment>[cited 2025 Jun 17]</comment>. Available from: <ext-link ext-link-type="uri" xlink:href="https://learnopencv.com/yolo11/">https://learnopencv.com/yolo11/</ext-link>.</mixed-citation></ref>
<ref id="ref-23"><label>[23]</label><mixed-citation publication-type="other"><person-group person-group-type="author"><string-name><surname>Khanam</surname> <given-names>R</given-names></string-name>, <string-name><surname>Hussain</surname> <given-names>M</given-names></string-name></person-group>. <article-title>YOLOv11: an overview of the key architectural enhancements</article-title>. <comment>arXiv:2410.17725v1. 2024</comment>.</mixed-citation></ref>
<ref id="ref-24"><label>[24]</label><mixed-citation publication-type="other"><person-group person-group-type="author"><string-name><surname>Hidayatullah</surname> <given-names>P</given-names></string-name>, <string-name><surname>Syakrani</surname> <given-names>N</given-names></string-name>, <string-name><surname>Sholahuddin</surname> <given-names>MR</given-names></string-name>, <string-name><surname>Gelar</surname> <given-names>T</given-names></string-name>, <string-name><surname>Tubagus</surname> <given-names>R</given-names></string-name></person-group>. <article-title>YOLOv8 to YOLO11: a comprehensive architecture in-depth comparative review</article-title>. <comment>arXiv:2504.12345. 2025</comment>.</mixed-citation></ref>
<ref id="ref-25"><label>[25]</label><mixed-citation publication-type="conf-proc"><person-group person-group-type="author"><string-name><surname>De Ven&#x00E2;ncio</surname> <given-names>PVAB</given-names></string-name>, <string-name><surname>Rezende</surname> <given-names>TM</given-names></string-name>, <string-name><surname>Lisboa</surname> <given-names>AC</given-names></string-name>, <string-name><surname>Barbosa</surname> <given-names>AV</given-names></string-name></person-group>. <article-title>Fire detection based on a two-dimensional convolutional neural network and temporal analysis</article-title>. In: <conf-name>Proceedings of the 2021 IEEE Latin American Conference on Computational Intelligence (LA-CCI); 2021 Nov 2&#x2013;4</conf-name>; <publisher-loc>Temuco, Chile</publisher-loc>. doi:<pub-id pub-id-type="doi">10.1109/LA-CCI48322.2021.9769824</pub-id>.</mixed-citation></ref>
<ref id="ref-26"><label>[26]</label><mixed-citation publication-type="other"><person-group person-group-type="author"><string-name><surname>Boroujeni</surname> <given-names>SPH</given-names></string-name>, <string-name><surname>Mehrabi</surname> <given-names>N</given-names></string-name>, <string-name><surname>Afghah</surname> <given-names>F</given-names></string-name>, <string-name><surname>McGrath</surname> <given-names>CP</given-names></string-name>, <string-name><surname>Bhatkar</surname> <given-names>D</given-names></string-name>, <string-name><surname>Biradar</surname> <given-names>MA</given-names></string-name>, <etal>et al</etal></person-group>. <article-title>Fire and smoke datasets in 20 years: an in-depth review</article-title>. <comment>arXiv:2503.14552. 2025</comment>.</mixed-citation></ref>
<ref id="ref-27"><label>[27]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Terven</surname> <given-names>J</given-names></string-name>, <string-name><surname>Cordova-Esparza</surname> <given-names>DM</given-names></string-name>, <string-name><surname>Romero-Gonz&#x00E1;lez</surname> <given-names>JA</given-names></string-name>, <string-name><surname>Ram&#x00ED;rez-Pedraza</surname> <given-names>A</given-names></string-name>, <string-name><surname>Ch&#x00E1;vez-Urbiola</surname> <given-names>EA</given-names></string-name></person-group>. <article-title>A comprehensive survey of loss functions and metrics in deep learning</article-title>. <source>Artif Intell Rev</source>. <year>2025</year>;<volume>58</volume>(<issue>7</issue>):<fpage>195</fpage>. doi:<pub-id pub-id-type="doi">10.1007/s10462-025-11198-7</pub-id>.</mixed-citation></ref>
</ref-list>
</back></article>