<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.1 20151215//EN" "http://jats.nlm.nih.gov/publishing/1.1/JATS-journalpublishing1.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:mml="http://www.w3.org/1998/Math/MathML" xml:lang="en" article-type="research-article" dtd-version="1.1">
<front>
<journal-meta>
<journal-id journal-id-type="pmc">CMC</journal-id>
<journal-id journal-id-type="nlm-ta">CMC</journal-id>
<journal-id journal-id-type="publisher-id">CMC</journal-id>
<journal-title-group>
<journal-title>Computers, Materials &#x0026; Continua</journal-title>
</journal-title-group>
<issn pub-type="epub">1546-2226</issn>
<issn pub-type="ppub">1546-2218</issn>
<publisher>
<publisher-name>Tech Science Press</publisher-name>
<publisher-loc>USA</publisher-loc>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">56823</article-id>
<article-id pub-id-type="doi">10.32604/cmc.2024.056823</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Article</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Improved IChOA-Based Reinforcement Learning for Secrecy Rate Optimization in Smart Grid Communications</article-title>
<alt-title alt-title-type="left-running-head">Improved IChOA-Based Reinforcement Learning for Secrecy Rate Optimization in Smart Grid Communications</alt-title>
<alt-title alt-title-type="right-running-head">Improved IChOA-Based Reinforcement Learning for Secrecy Rate Optimization in Smart Grid Communications</alt-title>
</title-group>
<contrib-group>
<contrib id="author-1" contrib-type="author">
<name name-style="western"><surname>Shoeibi</surname><given-names>Mehrdad</given-names></name><xref ref-type="aff" rid="aff-1">1</xref></contrib>
<contrib id="author-2" contrib-type="author">
<name name-style="western"><surname>Nevisi</surname><given-names>Mohammad Mehdi Sharifi</given-names></name><xref ref-type="aff" rid="aff-2">2</xref></contrib>
<contrib id="author-3" contrib-type="author">
<name name-style="western"><surname>Khatami</surname><given-names>Sarvenaz Sadat</given-names></name><xref ref-type="aff" rid="aff-3">3</xref></contrib>
<contrib id="author-4" contrib-type="author" corresp="yes">
<name name-style="western"><surname>Mart&#x00ED;n</surname><given-names>Diego</given-names></name><xref ref-type="aff" rid="aff-2">2</xref><email>diego.martin.andres@uva.es</email></contrib>
<contrib id="author-5" contrib-type="author">
<name name-style="western"><surname>Soltani</surname><given-names>Sepehr</given-names></name><xref ref-type="aff" rid="aff-4">4</xref></contrib>
<contrib id="author-6" contrib-type="author">
<name name-style="western"><surname>Aghakhani</surname><given-names>Sina</given-names></name><xref ref-type="aff" rid="aff-5">5</xref></contrib>
<aff id="aff-1"><label>1</label><institution>The WPI Business School, Worcester Polytechnic Institute</institution>, <addr-line>Worcester, MA 01609-2280</addr-line>, <country>USA</country></aff>
<aff id="aff-2"><label>2</label><institution>Department of Computer Science, Escuela de Ingenier&#x00ED;a Inform&#x00E1;tica de Segovia, Universidad de Valladolid</institution>, <addr-line>Segovia, 40005</addr-line>, <country>Spain</country></aff>
<aff id="aff-3"><label>3</label><institution>Department of Data Science Engineering, University of Houston</institution>, <addr-line>Houston, TX 77204</addr-line>, <country>USA</country></aff>
<aff id="aff-4"><label>4</label><institution>Department of Industrial Engineering, College of Engineering, University of Houston</institution>, <addr-line>Houston, TX 77204</addr-line>, <country>USA</country></aff>
<aff id="aff-5"><label>5</label><institution>Department of Industrial and Manufacturing Systems Engineering, Iowa State University</institution>, <addr-line>Ames, IA 50011</addr-line>, <country>USA</country></aff>
</contrib-group>
<author-notes>
<corresp id="cor1"><label>&#x002A;</label>Corresponding Author: Diego Mart&#x00ED;n. Email: <email>diego.martin.andres@uva.es</email></corresp>
</author-notes>
<pub-date date-type="collection" publication-format="electronic">
<year>2024</year>
</pub-date>
<pub-date date-type="pub" publication-format="electronic">
<day>18</day><month>11</month><year>2024</year>
</pub-date>
<volume>81</volume>
<issue>2</issue>
<fpage>2819</fpage>
<lpage>2843</lpage>
<history>
<date date-type="received">
<day>31</day>
<month>7</month>
<year>2024</year>
</date>
<date date-type="accepted">
<day>29</day>
<month>9</month>
<year>2024</year>
</date>
</history>
<permissions>
<copyright-statement>&#x00A9; 2024 The Authors.</copyright-statement>
<copyright-year>2024</copyright-year>
<copyright-holder>Published by Tech Science Press.</copyright-holder>
<license xlink:href="https://creativecommons.org/licenses/by/4.0/">
<license-p>This work is licensed under a <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution 4.0 International License</ext-link>, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
</license>
</permissions>
<self-uri content-type="pdf" xlink:href="TSP_CMC_56823.pdf"></self-uri>
<abstract>
<p>In the evolving landscape of the smart grid (SG), the integration of non-organic multiple access (NOMA) technology has emerged as a pivotal strategy for enhancing spectral efficiency and energy management. However, the open nature of wireless channels in SG raises significant concerns regarding the confidentiality of critical control messages, especially when broadcasted from a neighborhood gateway (NG) to smart meters (SMs). This paper introduces a novel approach based on reinforcement learning (RL) to fortify the performance of secrecy. Motivated by the need for efficient and effective training of the fully connected layers in the RL network, we employ an improved chimp optimization algorithm (IChOA) to update the parameters of the RL. By integrating the IChOA into the training process, the RL agent is expected to learn more robust policies faster and with better convergence properties compared to standard optimization algorithms. This can lead to improved performance in complex SG environments, where the agent must make decisions that enhance the security and efficiency of the network. We compared the performance of our proposed method (IChOA-RL) with several state-of-the-art machine learning (ML) algorithms, including recurrent neural network (RNN), long short-term memory (LSTM), K-nearest neighbors (KNN), support vector machine (SVM), improved crow search algorithm (I-CSA), and grey wolf optimizer (GWO). Extensive simulations demonstrate the efficacy of our approach compared to the related works, showcasing significant improvements in secrecy capacity rates under various network conditions. The proposed IChOA-RL exhibits superior performance compared to other algorithms in various aspects, including the scalability of the NOMA communication system, accuracy, coefficient of determination (<inline-formula id="ieqn-1"><mml:math id="mml-ieqn-1"><mml:msup><mml:mi>R</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>), root mean square error (RMSE), and convergence trend. For our dataset, the IChOA-RL architecture achieved coefficient of determination of 95.77% and accuracy of 97.41% in validation dataset. This was accompanied by the lowest RMSE (0.95), indicating very precise predictions with minimal error.</p>
</abstract>
<kwd-group kwd-group-type="author">
<kwd>Smart grid communication</kwd>
<kwd>secrecy rate optimization</kwd>
<kwd>reinforcement learning</kwd>
<kwd>improved chimp optimization algorithm</kwd>
</kwd-group>
<funding-group>
<award-group id="awg1">
<funding-source>Ministerio de Ciencia e Investigaci&#x00F3;n</funding-source>
</award-group>
</funding-group>
</article-meta>
</front>
<body>
<sec id="s1">
<label>1</label>
<title>Introduction</title>
<p>The smart grid (SG) represents a transformative leap in energy management, integrating advanced digital technology into the traditional power grid to enhance efficiency [<xref ref-type="bibr" rid="ref-1">1</xref>&#x2013;<xref ref-type="bibr" rid="ref-3">3</xref>], reliability, and sustainability [<xref ref-type="bibr" rid="ref-4">4</xref>]. As an integral component of this modernization, communication technologies play a pivotal role, facilitating real-time data exchange and control across various grid components [<xref ref-type="bibr" rid="ref-5">5</xref>]. Within this context, non-orthogonal multiple access (NOMA) has emerged as a significant advancement, offering a paradigm shift in SG communications [<xref ref-type="bibr" rid="ref-6">6</xref>]. NOMA stands out by enabling multiple users to share the same frequency resources, thereby drastically increasing spectral efficiency and network capacity [<xref ref-type="bibr" rid="ref-7">7</xref>]. This is particularly crucial in SG environments, where the need to simultaneously connect a multitude of devices, such as smart meters (SMs) and renewable energy sources, is ever-growing. By efficiently managing these dense and diverse communication demands, NOMA not only addresses the scalability challenges of the SG but also contributes to the overall optimization of energy distribution and consumption, heralding a new era of intelligent energy management [<xref ref-type="bibr" rid="ref-8">8</xref>].</p>
<p>Security concerns in SG are paramount, given the critical nature of energy infrastructure and the sensitive data involved in its operation [<xref ref-type="bibr" rid="ref-9">9</xref>]. As SGs become increasingly interconnected and reliant on wireless communications, they become vulnerable to various cyber threats [<xref ref-type="bibr" rid="ref-10">10</xref>&#x2013;<xref ref-type="bibr" rid="ref-12">12</xref>]. One notable security threat in the SG neighborhood area networks (NAN) is the risk of eavesdropping and impersonation attacks. For instance, an attacker might position themselves as an eavesdropper within the communication range of a neighborhood gateway (NG) and the SMs it controls. By intercepting the communication, the attacker could gain unauthorized access to confidential information, such as consumption data or control commands. More alarmingly, they could impersonate the NG, sending fraudulent signals or commands to the SMs. Such an attack could lead to severe consequences, including the disruption of power distribution, manipulation of billing data, or even causing physical damage to the grid infrastructure. This scenario underscores the critical need for robust security mechanisms in SG communications, to prevent unauthorized access and ensure the integrity and reliability of the energy supply chain [<xref ref-type="bibr" rid="ref-13">13</xref>].</p>
<p>The importance of secrecy performance analysis in designing security schemes for SG communications cannot be overstated [<xref ref-type="bibr" rid="ref-14">14</xref>&#x2013;<xref ref-type="bibr" rid="ref-16">16</xref>], particularly in the context of emerging technologies like NOMA. Secrecy performance analysis is crucial for evaluating how well a communication system can protect against unauthorized interception and ensure the confidentiality of transmitted data [<xref ref-type="bibr" rid="ref-16">16</xref>&#x2013;<xref ref-type="bibr" rid="ref-18">18</xref>]. A key metric in this analysis is the secrecy capacity, which is defined as the maximum rate at which information can be reliably transmitted to the intended receiver while ensuring that an eavesdropper gains negligible information [<xref ref-type="bibr" rid="ref-19">19</xref>&#x2013;<xref ref-type="bibr" rid="ref-21">21</xref>]. In NOMA SG communication, optimizing secrecy capacity poses a unique challenge. NOMA systems are inherently designed to allow multiple users to share the same frequency resources, which increases the complexity of maintaining secure communications [<xref ref-type="bibr" rid="ref-22">22</xref>,<xref ref-type="bibr" rid="ref-23">23</xref>]. The shared spectrum means that the signals intended for legitimate users can be more susceptible to interception by eavesdroppers. Optimizing secrecy capacity in this context involves not only enhancing the signal strength at the intended receivers but also minimizing the information leakage to potential eavesdroppers [<xref ref-type="bibr" rid="ref-24">24</xref>,<xref ref-type="bibr" rid="ref-25">25</xref>]. This requires sophisticated strategies that can dynamically adapt to the varying channel conditions and user positions typical in SG environments, ensuring robust and secure communication against the backdrop of NOMA&#x2019;s spectral efficiency benefits [<xref ref-type="bibr" rid="ref-26">26</xref>&#x2013;<xref ref-type="bibr" rid="ref-28">28</xref>].</p>
<p>There is limited research on applying deep learning (DL) and reinforcement learning (RL) models to improve secrecy in NOMA communication systems. Ali et al. [<xref ref-type="bibr" rid="ref-14">14</xref>] developed advanced resource allocation strategies for future communication systems, focusing on maximizing the total transmission rate within a restricted power budget and ensuring a necessary power differential among users for effective NOMA deployment. They introduced a deep neural network (DNN) framework to determine a combined power allocation strategy for both source and relay nodes. To support the training and validation of the DNN, they also obtained an optimal solution using convex optimization methods, which served as a benchmark to evaluate the DNN solution&#x2019;s effectiveness. It was found that the DNN solution delivers promising outcomes in terms of both sum rate and computational efficiency.</p>
<p>Given the notable gap in SG literature regarding the lack of a robust secrecy performance optimization scheme in NOMA communications, this paper introduces a pioneering approach based on RL to fortify this critical aspect. Recognizing the complexity and dynamism inherent in SG communication systems, especially under the NOMA paradigm, our research proposes leveraging the adaptive and predictive capabilities of RL. RL is selected over other machine learning (ML) methods for secrecy optimization in SG communications due to its distinct capabilities in handling dynamic and complex environments. Unlike static ML models like K-nearest neighbors (KNN), support vector machine (SVM), RL excels in adapting to evolving network conditions by continuously learning optimal policies through interactions with the environment, making it particularly suited for the unpredictable nature of SGs. Additionally, RL&#x2019;s proficiency in sequential decision making allows it to optimize long-term secrecy performance by considering the future implications of current actions, which is crucial for maintaining secure communication over time. This novel approach is specifically designed to enhance the secrecy capacity rate, a vital metric of secrecy performance, in NOMA communications within SG environments. By employing RL algorithms, our method aims to adjust communication strategies intelligently and dynamically in response to varying network conditions and potential security threats. This allows for the optimization of secrecy capacity rates, ensuring that sensitive data transmitted across the SG remains secure from eavesdroppers and malicious actors. Our research, therefore, stands at the forefront of addressing a critical, yet previously unexplored, aspect of SG communications, offering a significant contribution to the advancement of secure and resilient SG networks.</p>
<p>The training process of a fully connected neural network, commonly used in RL, is a critical phase where the network learns to approximate the optimal policy for decision-making. In RL, a fully connected neural network, also known as a deep Q network (DQN) when used in Q-learning, is often responsible for mapping states to action values. The quality of this mapping directly influences the agent&#x2019;s ability to make intelligent decisions that maximize the cumulative reward over time. The importance of the training process lies in its ability to capture the complex relationships between the actions, the state of the environment, and the received rewards. Proper training ensures that the neural network generalizes well to unseen states, enabling the RL agent to perform well across the entire state space of the problem. Motivated by the need for efficient and effective training of the fully connected layers in the RL network, we employ an improved chimp optimization algorithm (IChOA) to update the parameters of the neural network, which is inspired by the intelligent hunting behavior of chimpanzees in nature.</p>
<p>The choice of combining RL with IChOA to enhance secrecy performance in SGs is driven by the need to address the complex and dynamic nature of SG communication environments, particularly under the NOMA paradigm. SGs are characterized by their high connectivity and reliance on wireless communication, which inherently increases the risk of eavesdropping and other security threats. RL offers a robust framework for optimizing secrecy capacity by dynamically adapting communication strategies to counteract these threats, ensuring that sensitive data remains secure. However, the effectiveness of RL heavily depends on the efficiency of its training process, where the optimization of neural network parameters plays a crucial role in determining the agent&#x2019;s ability to make intelligent decisions under varying network conditions. The integration of IChOA into the RL framework is justified by its ability to enhance the training process, specifically by improving the convergence speed and robustness of the learned policies. This combination allows the RL agent to learn more effective policies faster and with greater accuracy, thereby improving the overall secrecy performance. By comparing the proposed IChOA-RL method against other state-of-the-art DL and ML algorithms, the paper demonstrates that this approach not only surpasses traditional methods in terms of scalability, accuracy, and convergence but also provides a more effective solution for the specific challenges of optimizing secrecy in SG communications.</p>
<sec id="s1_1">
<label>1.1</label>
<title>Related Works</title>
<p>Several research efforts have focused on investigating the physical layer security (PLS) performance of SG communications in recent years. Campongara et al. [<xref ref-type="bibr" rid="ref-29">29</xref>] explored the benefits of hybrid power line communication (PLC)/wireless channels for improving PLS in low-bit-rate applications. They derived mathematical formulations for the average secrecy capacity (ASC) and secrecy outage probability (SOP), revealing the advantages of hybrid PLC/wireless models in enhancing PLS when eavesdroppers utilize a single data communication interface. Salem et al. [<xref ref-type="bibr" rid="ref-30">30</xref>] delved into the PLS of cooperative relaying PLC systems with artificial noise. They derived expressions for ASC, highlighting the potential of cooperative relaying to significantly enhance the security of PLC systems. Building on this, Salem et al. [<xref ref-type="bibr" rid="ref-31">31</xref>] extended their study to consider PLS in correlated log-normal cooperative PLC networks. Their work analyzed the impact of background and impulsive noise components, providing mathematical insights into ASC and SOP under various network scenarios.</p>
<p>Odeyemi et al. [<xref ref-type="bibr" rid="ref-32">32</xref>] introduced a dynamic wide area network (WAN) for SGs featuring a friendly jammer to enhance network secrecy. They derived closed-form expressions for connection SOP and ASC, showcasing the network&#x2019;s enhanced security performance. Atallah et al. [<xref ref-type="bibr" rid="ref-33">33</xref>] investigated PLS performance in wireless sensor networks within SG environments. They considered the impact of destination-assisted jamming on secrecy performance metrics and derived analytical expressions for SOP, revealing the potential for significant improvement in security using jamming techniques. El-Shafie et al. [<xref ref-type="bibr" rid="ref-34">34</xref>] studied the influence of wireless network&#x2019;s PLS and reliability on demand-side management in SGs. Their work explored the tradeoff between security and reliability, proposing artificial-noise-aided schemes and encoding strategies to enhance security and reliability in SG. Mohan et al. [<xref ref-type="bibr" rid="ref-35">35</xref>] examined PLS in low-frequency PLC systems, focusing on ASC and SOP. They considered both the independent and correlated log-normal channel distributions, incorporating the impact of impulsive noise and various network parameters.</p>
<p>Kaveh et al. [<xref ref-type="bibr" rid="ref-18">18</xref>] delved into the application of reconfigurable intelligent surfaces (RIS) to enhance the PLS in SG communications. The research addresses the vulnerabilities of SG communication links to eavesdropping and unauthorized access, proposing RIS as a solution to improve secrecy performance. By integrating RIS with reflecting elements in the SG environment, alongside SMs, neighborhood gateways, and potential eavesdroppers, the authors derive closed-form expressions for SOP and ASC. They analyze the signal-to-noise ratio (SNR) distributions at both the gateway and the eavesdropper, providing a comprehensive evaluation of the impact of various system parameters. Their asymptotic analysis under high-SNR conditions, supported by Monte Carlo simulations, validates that RIS can significantly enhance the secrecy performance of SG communications, outperforming conventional scenarios without RIS. Faheem et al. [<xref ref-type="bibr" rid="ref-36">36</xref>] introduced a framework utilizing smart contracts within a Solana blockchain-based industrial wireless sensor network (BCWSN), referred to as the advanced Solana blockchain (ABC), specifically designed for distributed energy resources (DERs) in SGs. This ABC framework facilitates robust and secure real-time control and monitoring of DERs within the SGs. Performance evaluations and security analyses demonstrated that the ABC scheme is secure, dependable, and efficient for lightweight data sharing between DERs in SGs.</p>
<p>However, while some studies have focused on analyzing the secrecy performance in SG communications under various system and channel conditions, there has been limited research on developing optimization approaches specifically aimed at optimizing the secrecy rate in SG. Mensi et al. [<xref ref-type="bibr" rid="ref-37">37</xref>] investigated the security challenges posed by the Internet of Things (IoT) and bidirectional communications in SG environments. Given the increasing data transmission demands due to the proliferation of IoT devices, the study emphasizes the need for high data rate technologies like Sub-6 GHz, millimeter-wave (mmWave), and massive multiple-input multiple-output (MIMO). The authors address the vulnerabilities of IoT-enabled SGs to eavesdropping and jamming attacks, proposing a hybrid beamforming design to enhance secrecy capacity. Unlike previous methods that increase secrecy capacity through random power augmentation or system combiner settings, this research utilizes the Gradient Ascent algorithm to optimize the beamforming strategy, considering both fixed and variable transmit power scenarios. The study&#x2019;s numerical results validate the efficacy of their approach, highlighting its potential for improving security in SG communications. Although the work by Mensi has proposed a method to optimize secrecy performance in SG, there remains a need for developing a more robust optimization approach to enhance the secrecy rate in SG. The Gradient Ascent Algorithm, as used by Mensi, can get stuck in local minima. Therefore, a novel approach with a stronger capability for exploration and exploitation in such problem environments would likely yield a more optimal secrecy rate.</p>
</sec>
<sec id="s1_2">
<label>1.2</label>
<title>Paper Contributions</title>
<p><list list-type="bullet">
<list-item>
<p>This study introduces a new IChOA-RL model aimed at optimizing secrecy performance for secure NOMA communication within an SG. The IChOA is used to optimize the parameters (weights and biases) of the RL.</p></list-item>
<list-item>
<p>In the proposed IChOA, a new V-shaped transfer function is introduced to enhance the ChOA. The primary benefit of IChOA is its proficiency in balancing exploration and exploitation.</p></list-item>
<list-item>
<p>The effectiveness of the proposed IChOA-RL model is evaluated by comparing it with various advanced ML algorithms, such as recurrent neural network (RNN), long short-term memory (LSTM), KNN, SVM, improved crow search algorithm (I-CSA), and grey wolf optimizer (GWO).</p></list-item>
<list-item>
<p>The evaluation of the results utilizes multiple criteria such as the scalability of the NOMA communication system, accuracy, coefficient of determination (<inline-formula id="ieqn-3"><mml:math id="mml-ieqn-3"><mml:msup><mml:mi>R</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>), root mean square error (RMSE), and convergence curves. Simulation results indicate that the IChOA-RL model surpasses other models in performance. The use of IChOA in the training process of neural networks shows that it can significantly speed up learning and convergence to optimal policies, ensuring efficient power resource utilization while maintaining high security levels.</p></list-item>
</list></p>
</sec>
<sec id="s1_3">
<label>1.3</label>
<title>Main Objectives of the Study</title>
<p><list list-type="bullet">
<list-item>
<p>Enhance secrecy performance in SG Communications: This study aims to develop a novel RL framework, integrated with an IChOA, to optimize the secrecy capacity rate in SG NAN. By leveraging advanced RL algorithms, the framework seeks to intelligently adapt to dynamic communication environments, ensuring secure NOMA SG communication.</p></list-item>
<list-item>
<p>Improve training efficiency and convergence: Another key objective is to improve the training efficiency and convergence properties of the RL network through the integration of the IChOA. This integration is expected to enable the RL agent to learn more robust policies faster compared to standard algorithms, thereby enhancing the overall performance in complex SG environments.</p></list-item>
<list-item>
<p>Compare and validate performance: The study also aims to extensively compare and validate the performance of the proposed IChOA-RL method against several state-of-the-art ML algorithms, including RNN, LSTM, KNN, SVM, I-CSA, and GWO. The objective is to demonstrate significant improvements in secrecy capacity rates, scalability, accuracy, R&#x00B2;, RMSE, and convergence trends under various network conditions.</p></list-item>
</list></p>
</sec>
<sec id="s1_4">
<label>1.4</label>
<title>Paper Organization</title>
<p>The organization of our paper is as follows. In <xref ref-type="sec" rid="s2">Section 2</xref>, we present the detailed architecture of the studied system model and formulate the specific problem of optimizing the secrecy capacity rate. This section also introduces our novel RL-based approach, explaining how it addresses the challenges identified in the problem formulation. <xref ref-type="sec" rid="s3">Section 3</xref> demonstrates the effectiveness of our proposed solution through rigorous simulation scenarios and provides a comparative analysis with existing methods. Finally, <xref ref-type="sec" rid="s4">Section 4</xref> summarizes our key findings and discusses their implications for the future of secure SG communications.</p>
</sec>
</sec>
<sec id="s2">
<label>2</label>
<title>Research Method and Modeling</title>
<p>This section delineates the proposed RL technique aimed at optimizing the secrecy rate within the established SG NOMA communication system. In the context of RL, the IChOA is utilized to optimize the weights and biases of the fully connected neural network. It updates the network parameters in a way that the resultant policy maximizes the expected rewards.</p>
<sec id="s2_1">
<label>2.1</label>
<title>System Model and Problem Formulation</title>
<p>The system under consideration is an SG NOMA communication model designed for secure message broadcasting from an NG to a set of K SMs under its control in an NAN, indexed by <inline-formula id="ieqn-4"><mml:math id="mml-ieqn-4"><mml:mi>S</mml:mi><mml:msub><mml:mi>M</mml:mi><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo></mml:math></inline-formula> <inline-formula id="ieqn-5"><mml:math id="mml-ieqn-5"><mml:mi>S</mml:mi><mml:msub><mml:mi>M</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mo>&#x2026;</mml:mo><mml:mo>,</mml:mo><mml:mi>S</mml:mi><mml:msub><mml:mi>M</mml:mi><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>. The NG serves as a central hub that uses NOMA to transmit critical control messages to the SMs, which are the end-users of the grid. The system is under the threat of an eavesdropper (Eve) attempting to intercept the communications. The NOMA protocol employed allows multiple SMs to be served simultaneously over the same frequency band by exploiting the power domain. Each SM is assigned a different power level based on the channel state information (CSI), which is assumed to be perfectly known at the NG. The signals are superimposed when transmitted by the NG and are separated at the receiver side using successive interference cancellation (SIC), which requires the SMs to decode and subtract signals not intended for them before decoding their own.</p>
<p>The channel between the NG and each SM (<inline-formula id="ieqn-6"><mml:math id="mml-ieqn-6"><mml:msub><mml:mi>h</mml:mi><mml:mrow><mml:mi>m</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>), as well as between the NG and Eve (<inline-formula id="ieqn-7"><mml:math id="mml-ieqn-7"><mml:msub><mml:mi>h</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>), is subject to Rayleigh fading, characterized by a probability density function of the signal&#x2019;s amplitude. This fading model is appropriate for environments where multiple scattered paths exist without a line of sight. The channel coefficients are modeled as complex Gaussian random variables with zero mean and unit variance, representing the rapid changes in the amplitude and phase of the signals due to multipath propagation. The SG environment is dynamic, with the channel conditions varying due to factors such as physical obstructions, weather changes, and varying electrical load. We assume NG is a multi-antenna user while SMs and Eve are single-antenna users. <xref ref-type="fig" rid="fig-1">Fig. 1</xref> depicts the studied system model in this paper.</p>
<fig id="fig-1">
<label>Figure 1</label>
<caption>
<title>The studied SG NOMA communication system model</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMC_56823-fig-1.tif"/>
</fig>
<p>Assuming, without loss of generality, that the users are ordered by their channel gain magnitudes, we have an ordered sequence from the weakest to the strongest channel gain relative to the eavesdropper&#x2019;s channel. In this NOMA setup, the NG broadcasts signals using a superposition coding strategy that combines the power-scaled messages of all SMs, where <inline-formula id="ieqn-8"><mml:math id="mml-ieqn-8"><mml:msub><mml:mi>&#x03B3;</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> represents the power allocation coefficient for the <italic>i-</italic>th SM, and <inline-formula id="ieqn-9"><mml:math id="mml-ieqn-9"><mml:mi>p</mml:mi></mml:math></inline-formula> denotes the total transmission power available at the NG. Each SM&#x2019;s message, <inline-formula id="ieqn-10"><mml:math id="mml-ieqn-10"><mml:msub><mml:mi>S</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>, is normalized such that the expected value of the message&#x2019;s power is unity. Following the NOMA protocol, we order the power allocation coefficients such that <inline-formula id="ieqn-11"><mml:math id="mml-ieqn-11"><mml:msub><mml:mi>&#x03B3;</mml:mi><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>&#x2265;</mml:mo><mml:msub><mml:mi>&#x03B3;</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo>&#x2265;</mml:mo><mml:mo>&#x2026;</mml:mo><mml:mo>&#x2265;</mml:mo><mml:msub><mml:mi>&#x03B3;</mml:mi><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>, with the sum of these coefficients equaling unity. The received signal at the <italic>i</italic>-th SM, <inline-formula id="ieqn-12"><mml:math id="mml-ieqn-12"><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>, and at the eavesdropper, <inline-formula id="ieqn-13"><mml:math id="mml-ieqn-13"><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>, are expressed as <xref ref-type="disp-formula" rid="eqn-1">Eqs. (1)</xref> and <xref ref-type="disp-formula" rid="eqn-2">(2)</xref>.
<disp-formula id="eqn-1"><label>(1)</label><mml:math id="mml-eqn-1" display="block"><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mrow><mml:msub><mml:mi>h</mml:mi><mml:mrow><mml:mi>m</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:munderover><mml:mo movablelimits="false">&#x2211;</mml:mo><mml:mrow><mml:mi>k</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>K</mml:mi></mml:mrow></mml:munderover><mml:msqrt><mml:msub><mml:mi>&#x03B3;</mml:mi><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msub></mml:msqrt><mml:mi>P</mml:mi><mml:msub><mml:mi>S</mml:mi><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow><mml:mo>+</mml:mo><mml:msub><mml:mi>n</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:math></disp-formula>
<disp-formula id="eqn-2"><label>(2)</label><mml:math id="mml-eqn-2" display="block"><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>h</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:munderover><mml:mo movablelimits="false">&#x2211;</mml:mo><mml:mrow><mml:mi>k</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>K</mml:mi></mml:mrow></mml:munderover><mml:msqrt><mml:msub><mml:mi>&#x03B3;</mml:mi><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msub></mml:msqrt><mml:mi>P</mml:mi><mml:msub><mml:mi>S</mml:mi><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow><mml:mo>+</mml:mo><mml:msub><mml:mi>n</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub></mml:math></disp-formula>where <inline-formula id="ieqn-14"><mml:math id="mml-ieqn-14"><mml:msub><mml:mi>n</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> and <inline-formula id="ieqn-15"><mml:math id="mml-ieqn-15"><mml:msub><mml:mi>n</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> are the zero-mean additive white Gaussian noise (AWGN) components affecting the <italic>i</italic>-th SM and the eavesdropper respectively, both modeled as <inline-formula id="ieqn-16"><mml:math id="mml-ieqn-16"><mml:mi>N</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:msup><mml:mi>&#x03C3;</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup><mml:mo>)</mml:mo></mml:mrow></mml:math></inline-formula>. Following the principles of NOMA, each SM in the system, specifically the <italic>i</italic>-th SM, employs the SIC method to accurately detect their dedicated messages. This is executed by sequentially decoding messages intended for SMs with inferior channel gains-namely, any <italic>k</italic>-th SM where <inline-formula id="ieqn-17"><mml:math id="mml-ieqn-17"><mml:msup><mml:mrow><mml:mo>|</mml:mo><mml:msub><mml:mi>h</mml:mi><mml:mrow><mml:mrow><mml:msub><mml:mi>m</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mrow></mml:msub><mml:mo>|</mml:mo></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup><mml:mo>&#x003C;</mml:mo><mml:msup><mml:mrow><mml:mo>|</mml:mo><mml:msub><mml:mi>h</mml:mi><mml:mrow><mml:mrow><mml:msub><mml:mi>m</mml:mi><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mrow></mml:msub><mml:mo>|</mml:mo></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>-and then removing these decoded messages from the overall SNR of the received signals. Conversely, signals meant for users with superior channel gains compared to the <italic>i</italic>-th SM are treated as noise. To guarantee the effective application of SIC at the <italic>i</italic>-th SM&#x2019;s receiver, it is a prerequisite that the data rate at which the <italic>i-</italic>th SM decodes the <italic>k</italic>-th SM&#x2019;s message (<inline-formula id="ieqn-18"><mml:math id="mml-ieqn-18"><mml:msub><mml:mi>R</mml:mi><mml:mrow><mml:mi>k</mml:mi><mml:mo stretchy="false">&#x27F6;</mml:mo><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>) must not fall below the target data rate (<inline-formula id="ieqn-19"><mml:math id="mml-ieqn-19"><mml:mrow><mml:mover><mml:msub><mml:mi>R</mml:mi><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mo>&#x007E;</mml:mo></mml:mover></mml:mrow></mml:math></inline-formula>) set for the <italic>k</italic>-th SM. When the <italic>i</italic>-th SM successfully decodes its own message, the achievable data rate for this user, denoted as <inline-formula id="ieqn-20"><mml:math id="mml-ieqn-20"><mml:msub><mml:mi>R</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> and expressed in bits per second per Hertz (b/s/Hz), is calculated using <xref ref-type="disp-formula" rid="eqn-3">Eq. (3)</xref>.
<disp-formula id="eqn-3"><label>(3)</label><mml:math id="mml-eqn-3" display="block"><mml:msub><mml:mi>log</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo>&#x2061;</mml:mo><mml:mrow><mml:mo>(</mml:mo><mml:mn>1</mml:mn><mml:mo>+</mml:mo><mml:mfrac><mml:mrow><mml:mi>&#x03C1;</mml:mi><mml:msup><mml:mrow><mml:mo>|</mml:mo><mml:msub><mml:mi>h</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>|</mml:mo></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup><mml:msub><mml:mi>&#x03B3;</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mrow><mml:mn>1</mml:mn><mml:mo>+</mml:mo><mml:mi>&#x03C1;</mml:mi><mml:msup><mml:mrow><mml:mo>|</mml:mo><mml:msub><mml:mi>h</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>|</mml:mo></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup><mml:msubsup><mml:mo movablelimits="false">&#x2211;</mml:mo><mml:mrow><mml:mi>k</mml:mi><mml:mo>=</mml:mo><mml:mi>i</mml:mi><mml:mo>+</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>K</mml:mi></mml:mrow></mml:msubsup><mml:msub><mml:mi>&#x03B3;</mml:mi><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac><mml:mo>)</mml:mo></mml:mrow></mml:math></disp-formula>where <inline-formula id="ieqn-21"><mml:math id="mml-ieqn-21"><mml:mrow><mml:mi mathvariant="normal">&#x03C1;</mml:mi></mml:mrow></mml:math></inline-formula> represents the signal power to noise power ratio at the receiver, and <inline-formula id="ieqn-22"><mml:math id="mml-ieqn-22"><mml:msub><mml:mi>&#x03B3;</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> is the power allocation coefficient for <italic>i</italic>-th SM.</p>
<p>In addressing the eavesdropper&#x2019;s capabilities, the approach taken is to apply the SIC method to discern the messages intended <italic>i</italic>-th authorized SM. This user can decode at a rate represented by <inline-formula id="ieqn-23"><mml:math id="mml-ieqn-23"><mml:msubsup><mml:mi>R</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:math></inline-formula>. It is acknowledged that the eavesdropper might be among the NOMA user group or an external entity; hence, the formula for <inline-formula id="ieqn-24"><mml:math id="mml-ieqn-24"><mml:msubsup><mml:mi>R</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:math></inline-formula> will vary accordingly and will be elaborated upon in subsequent sections. The secrecy rate <italic>i</italic>-th NOMA SM is defined as achievable when there exists an encoding strategy that can provide both reliable communication to the intended user and complete secrecy from the eavesdropper. The secrecy rate <italic>i</italic>-th authorized SM, denoted as <inline-formula id="ieqn-25"><mml:math id="mml-ieqn-25"><mml:msubsup><mml:mi>R</mml:mi><mml:mrow><mml:mi>S</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msubsup></mml:math></inline-formula>, is the excess rate at which <italic>i-</italic>th SM can communicate over the eavesdropper&#x2019;s decoding rate and is mathematically represented as <xref ref-type="disp-formula" rid="eqn-4">Eq. (4)</xref>.
<disp-formula id="eqn-4"><label>(4)</label><mml:math id="mml-eqn-4" display="block"><mml:msubsup><mml:mi>R</mml:mi><mml:mrow><mml:mi>S</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:msup><mml:mrow><mml:mo>[</mml:mo><mml:msub><mml:mi>R</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mspace width="0pt" /><mml:mo>&#x2212;</mml:mo><mml:msubsup><mml:mi>R</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mspace width="0pt" /><mml:mo>]</mml:mo></mml:mrow><mml:mrow><mml:mo>+</mml:mo></mml:mrow></mml:msup></mml:math></disp-formula>where <inline-formula id="ieqn-26"><mml:math id="mml-ieqn-26"><mml:msub><mml:mi>R</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> is the SM&#x2019;s achievable rate as previously defined, and the operation <inline-formula id="ieqn-27"><mml:math id="mml-ieqn-27"><mml:msup><mml:mrow><mml:mo>[</mml:mo><mml:mi>x</mml:mi><mml:mspace width="0pt" /><mml:mo>]</mml:mo></mml:mrow><mml:mrow><mml:mo>+</mml:mo></mml:mrow></mml:msup></mml:math></inline-formula> signifies the positive part of <inline-formula id="ieqn-28"><mml:math id="mml-ieqn-28"><mml:mi>x</mml:mi></mml:math></inline-formula>, calculated as <inline-formula id="ieqn-29"><mml:math id="mml-ieqn-29"><mml:mo movablelimits="true" form="prefix">max</mml:mo><mml:mrow><mml:mo>(</mml:mo><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mi>x</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:math></inline-formula>. This definition is fundamental to ensure that a non-negative secrecy rate is maintained, providing a metric for secure communication.</p>
<p>We proceed under the assumption that complete CSI for all bona fide SMs is accessible to NG, and likewise, the CSI of the eavesdropper is also known. It is important to note that, through the use of SIC, the SM with the superior channel gain is capable of decoding the transmissions intended for other NOMA SMs that possess weaker channel gains. Therefore, in a scenario where there exists an internal adversary, the only SM that can achieve a secrecy rate greater than zero is the SM with the highest channel gain, identified as <italic>i-</italic>th SM. In the most adverse situation, where the penultimate user, or <italic>(i&#x2212;1)-</italic>th SM, is the eavesdropper aiming to intercept <italic>i-</italic>th SM&#x2019;s messages, the secrecy rate for every legitimate SM can be represented as <xref ref-type="disp-formula" rid="eqn-5">Eq. (5)</xref>.
<disp-formula id="eqn-5"><label>(5)</label><mml:math id="mml-eqn-5" display="block"><mml:msubsup><mml:mi>R</mml:mi><mml:mrow><mml:mi>S</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mtable columnalign="left left" rowspacing=".2em" columnspacing="1em" displaystyle="false"><mml:mtr><mml:mtd><mml:msub><mml:mi>log</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo>&#x2061;</mml:mo><mml:mrow><mml:mo>(</mml:mo><mml:mn>1</mml:mn><mml:mo>+</mml:mo><mml:mi>&#x03C1;</mml:mi><mml:msup><mml:mrow><mml:mo>|</mml:mo><mml:msub><mml:mi>h</mml:mi><mml:mrow><mml:mi>K</mml:mi></mml:mrow></mml:msub><mml:mo>|</mml:mo></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup><mml:msub><mml:mi>&#x03B3;</mml:mi><mml:mrow><mml:mi>K</mml:mi></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>log</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo>&#x2061;</mml:mo><mml:mrow><mml:mo>(</mml:mo><mml:mn>1</mml:mn><mml:mo>+</mml:mo><mml:mi>&#x03C1;</mml:mi><mml:msup><mml:mrow><mml:mo>|</mml:mo><mml:msub><mml:mi>h</mml:mi><mml:mrow><mml:mi>K</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>|</mml:mo></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup><mml:msub><mml:mi>&#x03B3;</mml:mi><mml:mrow><mml:mi>K</mml:mi></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mi>K</mml:mi></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mspace width="80pt" /><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mtext>&#x00A0;</mml:mtext><mml:mi>O</mml:mi><mml:mi>t</mml:mi><mml:mi>h</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi><mml:mi>w</mml:mi><mml:mi>i</mml:mi><mml:mi>s</mml:mi><mml:mi>e</mml:mi></mml:mtd></mml:mtr></mml:mtable><mml:mo fence="true" stretchy="true" symmetric="true"></mml:mo></mml:mrow></mml:math></disp-formula></p>
<p>According to [<xref ref-type="bibr" rid="ref-28">28</xref>], in the worst-case scenario, the analytical expression for the <italic>i-</italic>th SM&#x2019;s secrecy rate under the condition of asymptotically high SNR, that is, as <inline-formula id="ieqn-30"><mml:math id="mml-ieqn-30"><mml:mrow><mml:mi mathvariant="normal">&#x03C1;</mml:mi></mml:mrow></mml:math></inline-formula> approaches infinity, can be delineated as <xref ref-type="disp-formula" rid="eqn-6">Eq. (6)</xref>.
<disp-formula id="eqn-6"><label>(6)</label><mml:math id="mml-eqn-6" display="block"><mml:msubsup><mml:mi>R</mml:mi><mml:mrow><mml:mi>S</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:msub><mml:mi>&#x03B2;</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mfrac><mml:msub><mml:mi>log</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo>&#x2061;</mml:mo><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn><mml:mo>)</mml:mo></mml:mrow><mml:mrow><mml:mi mathvariant="normal">&#x0393;</mml:mi></mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mn>1</mml:mn><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>&#x03B2;</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow><mml:mo>&#x00D7;</mml:mo><mml:munderover><mml:mo movablelimits="false">&#x2211;</mml:mo><mml:mrow><mml:mi>s</mml:mi><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>2</mml:mn></mml:mrow></mml:munderover><mml:mrow><mml:mo>(</mml:mo><mml:mtable rowspacing="4pt" columnspacing="1em"><mml:mtr><mml:mtd><mml:mi>i</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>2</mml:mn></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mi>s</mml:mi></mml:mtd></mml:mtr></mml:mtable><mml:mo>)</mml:mo></mml:mrow><mml:msub><mml:mrow><mml:msup><mml:mrow><mml:mo>(</mml:mo><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn><mml:mo>)</mml:mo></mml:mrow><mml:mrow><mml:mi>s</mml:mi></mml:mrow></mml:msup></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mrow><mml:msub><mml:mi>F</mml:mi><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub></mml:mrow><mml:mrow><mml:mo>[</mml:mo><mml:mtable rowspacing="4pt" columnspacing="1em"><mml:mtr><mml:mtd><mml:mn>1</mml:mn><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>&#x03B2;</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mn>2</mml:mn></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mn>2</mml:mn><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>&#x03B2;</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mtd></mml:mtr></mml:mtable><mml:mo>;</mml:mo><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn><mml:mo>&#x2212;</mml:mo><mml:mi>s</mml:mi><mml:mo>]</mml:mo></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:math></disp-formula>where <inline-formula id="ieqn-31"><mml:math id="mml-ieqn-31"><mml:mrow><mml:mi mathvariant="normal">&#x0393;</mml:mi></mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mo>.</mml:mo><mml:mo>)</mml:mo></mml:mrow></mml:math></inline-formula> and <inline-formula id="ieqn-32"><mml:math id="mml-ieqn-32"><mml:mmultiscripts><mml:mrow><mml:mi>F</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:none/><mml:mprescripts/><mml:mrow><mml:mn>2</mml:mn></mml:mrow><mml:none/></mml:mmultiscripts><mml:mrow><mml:mo>[</mml:mo><mml:mtable rowspacing="4pt" columnspacing="1em"><mml:mtr><mml:mtd><mml:mo>.</mml:mo><mml:mo>,</mml:mo><mml:mo>.</mml:mo></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mo>.</mml:mo></mml:mtd></mml:mtr></mml:mtable><mml:mo>;</mml:mo><mml:mo>.</mml:mo><mml:mo>]</mml:mo></mml:mrow></mml:math></inline-formula> show the Gamma function and the generalized hyper-geometric function, respectively. The main objective of this paper is to maximize the secrecy rate in <xref ref-type="disp-formula" rid="eqn-6">Eq. (6)</xref> by using a novel RL technique.</p>
</sec>
<sec id="s2_2">
<label>2.2</label>
<title>Basic ChOA</title>
<p>The ChOA is a meta-heuristic technique that draws inspiration from the way chimpanzees forage for food and resources. Introduced in 2020 by Khishe and Mosavi, this algorithm emulates the foraging patterns of chimpanzees, including their social interactions and learning processes. ChOA models the collaborative hunting strategy of chimpanzees, where they exhibit roles such as the driver, chaser, blocker, and attacker. In a coordinated hunting strategy, different roles are played by chimpanzees [<xref ref-type="bibr" rid="ref-38">38</xref>&#x2013;<xref ref-type="bibr" rid="ref-40">40</xref>]. Driver chimps focus on tracking prey without directly approaching it, primarily to monitor its movements and pinpoint its location. Barrier chimps, often positioned in trees, strategically place themselves to create impediments that hinder the prey&#x2019;s progress, effectively steering it away from certain escape routes. Chaser chimps leverage their speed and agility to quickly close in on the prey, enhancing the prospects of a successful catch. Lastly, attacker chimps evaluate the prey&#x2019;s behavior to anticipate possible escape paths, positioning themselves to reroute the prey towards the chasers, thus boosting the chances of capture. These roles are translated into explorative and exploitative steps in the algorithm to find the best solutions. <xref ref-type="fig" rid="fig-2">Fig. 2</xref> shows two primary stages of the hunting procedure. ChOA is known for its balance between exploration, to find new potential areas in the search space, and exploitation, to refine the solutions in promising areas. <xref ref-type="disp-formula" rid="eqn-7">Eqs. (7)</xref>&#x2013;<xref ref-type="disp-formula" rid="eqn-11">(11)</xref> outline the formulas used for driving and chasing the prey.</p>
<p><disp-formula id="eqn-7"><label>(7)</label><mml:math id="mml-eqn-7" display="block"><mml:mi>d</mml:mi><mml:mo>=</mml:mo><mml:mrow><mml:mo>|</mml:mo><mml:mi>c</mml:mi><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>p</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>y</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:mi>t</mml:mi><mml:mo>)</mml:mo></mml:mrow><mml:mo>&#x2212;</mml:mo><mml:mi>m</mml:mi><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>c</mml:mi><mml:mi>h</mml:mi><mml:mi>i</mml:mi><mml:mi>m</mml:mi><mml:mi>p</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:mi>t</mml:mi><mml:mo>)</mml:mo></mml:mrow><mml:mo>|</mml:mo></mml:mrow></mml:math></disp-formula>
<disp-formula id="eqn-8"><label>(8)</label><mml:math id="mml-eqn-8" display="block"><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>c</mml:mi><mml:mi>h</mml:mi><mml:mi>i</mml:mi><mml:mi>m</mml:mi><mml:mi>p</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:mi>t</mml:mi><mml:mo>+</mml:mo><mml:mn>1</mml:mn><mml:mo>)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>p</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>y</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:mi>t</mml:mi><mml:mo>)</mml:mo></mml:mrow><mml:mo>&#x2212;</mml:mo><mml:mi>a</mml:mi><mml:mi>d</mml:mi></mml:math></disp-formula>
<disp-formula id="eqn-9"><label>(9)</label><mml:math id="mml-eqn-9" display="block"><mml:mi>a</mml:mi><mml:mo>=</mml:mo><mml:mn>2</mml:mn><mml:mi>f</mml:mi><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:mi>f</mml:mi></mml:math></disp-formula>
<disp-formula id="eqn-10"><label>(10)</label><mml:math id="mml-eqn-10" display="block"><mml:mi>c</mml:mi><mml:mo>=</mml:mo><mml:mn>2</mml:mn><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub></mml:math></disp-formula>
<disp-formula id="eqn-11"><label>(11)</label><mml:math id="mml-eqn-11" display="block"><mml:mi>m</mml:mi><mml:mo>=</mml:mo><mml:mi>C</mml:mi><mml:mi>h</mml:mi><mml:mi>a</mml:mi><mml:mi>o</mml:mi><mml:mi>t</mml:mi><mml:mi>i</mml:mi><mml:mi>c</mml:mi><mml:mi mathvariant="normal">&#x005F;</mml:mi><mml:mi>v</mml:mi><mml:mi>a</mml:mi><mml:mi>l</mml:mi><mml:mi>u</mml:mi><mml:mi>e</mml:mi></mml:math></disp-formula>where <inline-formula id="ieqn-33"><mml:math id="mml-ieqn-33"><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>p</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>y</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:mi>t</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:math></inline-formula> is the prey&#x2019;s position vector; <inline-formula id="ieqn-34"><mml:math id="mml-ieqn-34"><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>c</mml:mi><mml:mi>h</mml:mi><mml:mi>i</mml:mi><mml:mi>m</mml:mi><mml:mi>p</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mi>t</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> denotes the chimp&#x2019;s position vector; <inline-formula id="ieqn-35"><mml:math id="mml-ieqn-35"><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mrow><mml:mtext>&#xA0;and&#xA0;</mml:mtext></mml:mrow><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub></mml:math></inline-formula> are the random vectors <inline-formula id="ieqn-36"><mml:math id="mml-ieqn-36"><mml:mo>&#x2208;</mml:mo><mml:mo stretchy="false">[</mml:mo><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mn>1</mml:mn><mml:mo stretchy="false">]</mml:mo></mml:math></inline-formula>; <inline-formula id="ieqn-37"><mml:math id="mml-ieqn-37"><mml:mi>a</mml:mi><mml:mo>,</mml:mo><mml:mi>c</mml:mi><mml:mo>,</mml:mo><mml:mrow><mml:mtext>&#xA0;and&#xA0;</mml:mtext></mml:mrow><mml:mi>m</mml:mi></mml:math></inline-formula> are the coefficient vectors; <inline-formula id="ieqn-38"><mml:math id="mml-ieqn-38"><mml:mi>m</mml:mi></mml:math></inline-formula> indicates a chaotic vector; and <inline-formula id="ieqn-39"><mml:math id="mml-ieqn-39"><mml:mi>f</mml:mi></mml:math></inline-formula> is the dynamic vector <inline-formula id="ieqn-40"><mml:math id="mml-ieqn-40"><mml:mo>&#x2208;</mml:mo><mml:mo stretchy="false">[</mml:mo><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mn>2.5</mml:mn><mml:mo stretchy="false">]</mml:mo></mml:math></inline-formula>.</p>
<fig id="fig-2">
<label>Figure 2</label>
<caption>
<title>Hunting process in basic ChOA</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMC_56823-fig-2.tif"/>
</fig>
<p>During the hunting phase, chimpanzees initially locate their prey with the help of blockers, drivers, and chaser chimps. The prey&#x2019;s position is subsequently determined by barrier, attacker, chaser, and driver chimps, while other chimpanzees adjust their positions in response to the prey. These stages are expressed in <xref ref-type="disp-formula" rid="eqn-12">Eqs. (12)</xref>&#x2013;<xref ref-type="disp-formula" rid="eqn-14">(14)</xref>.
<disp-formula id="eqn-12"><label>(12)</label><mml:math id="mml-eqn-12" display="block"><mml:mrow><mml:mo>{</mml:mo><mml:mtable columnalign="left left" rowspacing=".2em" columnspacing="1em" displaystyle="false"><mml:mtr><mml:mtd><mml:msub><mml:mi>d</mml:mi><mml:mrow><mml:mi>A</mml:mi><mml:mi>t</mml:mi><mml:mi>t</mml:mi><mml:mi>a</mml:mi><mml:mi>c</mml:mi><mml:mi>h</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mrow><mml:mo>|</mml:mo><mml:msub><mml:mi>c</mml:mi><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>A</mml:mi><mml:mi>t</mml:mi><mml:mi>t</mml:mi><mml:mi>a</mml:mi><mml:mi>c</mml:mi><mml:mi>h</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>m</mml:mi><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mi>X</mml:mi><mml:mo>|</mml:mo></mml:mrow></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:msub><mml:mi>d</mml:mi><mml:mrow><mml:mi>B</mml:mi><mml:mi>a</mml:mi><mml:mi>r</mml:mi><mml:mi>r</mml:mi><mml:mi>i</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mrow><mml:mo>|</mml:mo><mml:msub><mml:mi>c</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>B</mml:mi><mml:mi>a</mml:mi><mml:mi>r</mml:mi><mml:mi>r</mml:mi><mml:mi>i</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>m</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mi>X</mml:mi><mml:mo>|</mml:mo></mml:mrow></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:msub><mml:mi>d</mml:mi><mml:mrow><mml:mi>C</mml:mi><mml:mi>h</mml:mi><mml:mi>a</mml:mi><mml:mi>s</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mrow><mml:mo>|</mml:mo><mml:msub><mml:mi>c</mml:mi><mml:mrow><mml:mn>3</mml:mn></mml:mrow></mml:msub><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>C</mml:mi><mml:mi>h</mml:mi><mml:mi>a</mml:mi><mml:mi>s</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>m</mml:mi><mml:mrow><mml:mn>3</mml:mn></mml:mrow></mml:msub><mml:mi>X</mml:mi><mml:mo>|</mml:mo></mml:mrow></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:msub><mml:mi>d</mml:mi><mml:mrow><mml:mi>D</mml:mi><mml:mi>r</mml:mi><mml:mi>i</mml:mi><mml:mi>v</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mrow><mml:mo>|</mml:mo><mml:msub><mml:mi>c</mml:mi><mml:mrow><mml:mn>4</mml:mn></mml:mrow></mml:msub><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>D</mml:mi><mml:mi>r</mml:mi><mml:mi>i</mml:mi><mml:mi>v</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>m</mml:mi><mml:mrow><mml:mn>4</mml:mn></mml:mrow></mml:msub><mml:mi>X</mml:mi><mml:mo>|</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable><mml:mo fence="true" stretchy="true" symmetric="true"></mml:mo></mml:mrow></mml:math></disp-formula>
<disp-formula id="eqn-13"><label>(13)</label><mml:math id="mml-eqn-13" display="block"><mml:mrow><mml:mo>{</mml:mo><mml:mtable columnalign="left left" rowspacing=".2em" columnspacing="1em" displaystyle="false"><mml:mtr><mml:mtd><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>A</mml:mi><mml:mi>t</mml:mi><mml:mi>t</mml:mi><mml:mi>a</mml:mi><mml:mi>c</mml:mi><mml:mi>h</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>d</mml:mi><mml:mrow><mml:mi>A</mml:mi><mml:mi>t</mml:mi><mml:mi>t</mml:mi><mml:mi>a</mml:mi><mml:mi>c</mml:mi><mml:mi>h</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">)</mml:mo></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>B</mml:mi><mml:mi>a</mml:mi><mml:mi>r</mml:mi><mml:mi>r</mml:mi><mml:mi>i</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>d</mml:mi><mml:mrow><mml:mi>B</mml:mi><mml:mi>a</mml:mi><mml:mi>r</mml:mi><mml:mi>r</mml:mi><mml:mi>i</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">)</mml:mo></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mn>3</mml:mn></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>C</mml:mi><mml:mi>h</mml:mi><mml:mi>a</mml:mi><mml:mi>s</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:mn>3</mml:mn></mml:mrow></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>d</mml:mi><mml:mrow><mml:mi>C</mml:mi><mml:mi>h</mml:mi><mml:mi>a</mml:mi><mml:mi>s</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">)</mml:mo></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mn>4</mml:mn></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>D</mml:mi><mml:mi>r</mml:mi><mml:mi>i</mml:mi><mml:mi>v</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:mn>4</mml:mn></mml:mrow></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>d</mml:mi><mml:mrow><mml:mi>D</mml:mi><mml:mi>r</mml:mi><mml:mi>i</mml:mi><mml:mi>v</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">)</mml:mo></mml:mtd></mml:mtr></mml:mtable><mml:mo fence="true" stretchy="true" symmetric="true"></mml:mo></mml:mrow></mml:math></disp-formula>
<disp-formula id="eqn-14"><label>(14)</label><mml:math id="mml-eqn-14" display="block"><mml:mi>X</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>t</mml:mi><mml:mo>+</mml:mo><mml:mn>1</mml:mn><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mn>3</mml:mn></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mn>4</mml:mn></mml:mrow></mml:msub></mml:mrow><mml:mn>4</mml:mn></mml:mfrac></mml:math></disp-formula>where <inline-formula id="ieqn-41"><mml:math id="mml-ieqn-41"><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>A</mml:mi><mml:mi>t</mml:mi><mml:mi>t</mml:mi><mml:mi>a</mml:mi><mml:mi>c</mml:mi><mml:mi>h</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> presents the best search agent, <inline-formula id="ieqn-42"><mml:math id="mml-ieqn-42"><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>B</mml:mi><mml:mi>a</mml:mi><mml:mi>r</mml:mi><mml:mi>r</mml:mi><mml:mi>i</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> is the second-best search agent, <inline-formula id="ieqn-43"><mml:math id="mml-ieqn-43"><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>C</mml:mi><mml:mi>h</mml:mi><mml:mi>a</mml:mi><mml:mi>s</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> denotes the third-best search agent, <inline-formula id="ieqn-44"><mml:math id="mml-ieqn-44"><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>D</mml:mi><mml:mi>r</mml:mi><mml:mi>i</mml:mi><mml:mi>v</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> is the fourth-best search agent, and <inline-formula id="ieqn-45"><mml:math id="mml-ieqn-45"><mml:mi>X</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>t</mml:mi><mml:mo>+</mml:mo><mml:mn>1</mml:mn><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> is the updated position of each chimp. <xref ref-type="fig" rid="fig-3">Fig. 3</xref> illustrates the position updating mechanism in the basic ChOA. This figure demonstrates how different roles assigned to chimpanzees influence the movement towards the prey during the optimization process.</p>
<fig id="fig-3">
<label>Figure 3</label>
<caption>
<title>Position updating in basic ChOA</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMC_56823-fig-3.tif"/>
</fig>
<p>Ultimately, once the hunt is over, all chimpanzees converge to attack the prey, driven by sexual motivation, irrespective of their roles. These sexual motivations are represented using chaotic maps, as shown in <xref ref-type="disp-formula" rid="eqn-15">Eq. (15)</xref>.
<disp-formula id="eqn-15"><label>(15)</label><mml:math id="mml-eqn-15" display="block"><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>c</mml:mi><mml:mi>h</mml:mi><mml:mi>i</mml:mi><mml:mi>m</mml:mi><mml:mi>p</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mi>t</mml:mi><mml:mo>+</mml:mo><mml:mn>1</mml:mn><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mtable columnalign="center center" rowspacing="4pt" columnspacing="1em"><mml:mtr><mml:mtd><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>p</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>y</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:mi>t</mml:mi><mml:mo>)</mml:mo></mml:mrow><mml:mo>&#x2212;</mml:mo><mml:mi>a</mml:mi><mml:mi>d</mml:mi></mml:mtd><mml:mtd><mml:mi>i</mml:mi><mml:mi>f</mml:mi><mml:mspace width="thinmathspace" /><mml:mi>&#x03BC;</mml:mi><mml:mo>&#x003C;</mml:mo><mml:mn>0.5</mml:mn></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mi>C</mml:mi><mml:mi>h</mml:mi><mml:mi>a</mml:mi><mml:mi>o</mml:mi><mml:mi>t</mml:mi><mml:mi>i</mml:mi><mml:mi>c</mml:mi><mml:mi mathvariant="normal">&#x005F;</mml:mi><mml:mi>v</mml:mi><mml:mi>a</mml:mi><mml:mi>l</mml:mi><mml:mi>u</mml:mi><mml:mi>e</mml:mi></mml:mtd><mml:mtd><mml:mi>i</mml:mi><mml:mi>f</mml:mi><mml:mspace width="thinmathspace" /><mml:mi>&#x03BC;</mml:mi><mml:mo>&#x2265;</mml:mo><mml:mn>0.5</mml:mn></mml:mtd></mml:mtr></mml:mtable><mml:mo fence="true" stretchy="true" symmetric="true"></mml:mo></mml:mrow></mml:math></disp-formula>where <inline-formula id="ieqn-46"><mml:math id="mml-ieqn-46"><mml:mi>&#x03BC;</mml:mi></mml:math></inline-formula> is the random number &#x2208; [0, 1].</p>
</sec>
<sec id="s2_3">
<label>2.3</label>
<title>Improved ChOA</title>
<p>The creation of a new binary version of the ChOA is motivated by the growing need for more robust and adaptable optimization algorithms in various fields such as science, engineering, and industry. Originally inspired by chimpanzees&#x2019; social hunting tactics, the standard ChOA has been effective in solving continuous optimization problems. However, its effectiveness in dealing with discrete variables is limited. This limitation underscores the necessity to improve the ChOA framework to adequately address discrete optimization challenges through a binary adaptation. As a result, there is an ongoing effort among researchers and industry professionals to enhance or develop new techniques that increase the efficiency and effectiveness of optimization processes.</p>
<p>Binary encoding streamlines the representation of variables, especially in optimization scenarios where variables are discrete. By using a binary format, ChOA avoids the necessity for continuous parameter adjustments, facilitating its application across different problem areas. The binary encoding of ChOA typically results in lower computational complexity compared to its continuous variable counterpart. This decrease in complexity can lead to quicker convergence and reduced computational demands, making ChOA more practical for addressing optimization challenges, particularly in scenarios with extensive solution spaces.</p>
<p>In binary algorithms, the transfer function plays a pivotal role in transitioning from a continuous to a discrete search space, where it handles binary decision variables. This function is vital because it enables the algorithm to switch between binary states, accommodating scenarios where traditional algorithms primarily handle continuous variables. The design of this function is critical to the algorithm&#x2019;s approach in navigating the search space, balancing the discovery of new opportunities (exploration) and focusing on promising solutions (exploitation). The ongoing development and enhancement of this transfer function are crucial for developing a successful binary meta-heuristic algorithm, as they significantly influence its search efficiency and convergence capabilities. Accordingly, our paper introduces a novel V-shaped transfer function to adapt the ChOA algorithm. In the suggested IChOA, the position update equation is defined as <xref ref-type="disp-formula" rid="eqn-16">Eq. (16)</xref>. To achieve this, a novel V-shaped transfer function is utilized as shown in <xref ref-type="disp-formula" rid="eqn-17">Eq. (17)</xref>.
<disp-formula id="eqn-16"><label>(16)</label><mml:math id="mml-eqn-16" display="block"><mml:msubsup><mml:mi>X</mml:mi><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mo>+</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mtable columnalign="center left" rowspacing="4pt" columnspacing="1em"><mml:mtr><mml:mtd><mml:mo stretchy="false">(</mml:mo><mml:msubsup><mml:mi>X</mml:mi><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msubsup><mml:msup><mml:mo stretchy="false">)</mml:mo><mml:mrow><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msup></mml:mtd><mml:mtd><mml:mi>i</mml:mi><mml:mi>f</mml:mi><mml:mtext>&#xA0;</mml:mtext><mml:mi>R</mml:mi><mml:mo>&#x003C;</mml:mo><mml:mi>T</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mfrac><mml:mrow><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mn>3</mml:mn></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mn>4</mml:mn></mml:mrow></mml:msub></mml:mrow><mml:mn>4</mml:mn></mml:mfrac></mml:mstyle><mml:mo>)</mml:mo></mml:mrow></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:msubsup><mml:mi>X</mml:mi><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msubsup></mml:mtd><mml:mtd><mml:mrow><mml:mtext>&#xA0;otherwise&#xA0;</mml:mtext></mml:mrow></mml:mtd></mml:mtr></mml:mtable><mml:mo fence="true" stretchy="true" symmetric="true"></mml:mo></mml:mrow></mml:math></disp-formula>
<disp-formula id="eqn-17"><label>(17)</label><mml:math id="mml-eqn-17" display="block"><mml:mi>T</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mi>x</mml:mi><mml:mo>)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mrow><mml:mo>|</mml:mo><mml:mfrac><mml:mn>3</mml:mn><mml:mrow><mml:mn>2</mml:mn><mml:mi>&#x03C0;</mml:mi></mml:mrow></mml:mfrac><mml:mi>a</mml:mi><mml:mi>r</mml:mi><mml:mi>c</mml:mi><mml:mi>tan</mml:mi><mml:mo>&#x2061;</mml:mo><mml:mrow><mml:mo>(</mml:mo><mml:mfrac><mml:mrow><mml:mn>3</mml:mn><mml:mi>&#x03C0;</mml:mi></mml:mrow><mml:mn>5</mml:mn></mml:mfrac><mml:mi>x</mml:mi><mml:mo>+</mml:mo><mml:mi>&#x03C6;</mml:mi><mml:mo>)</mml:mo></mml:mrow><mml:mo>|</mml:mo></mml:mrow></mml:math></disp-formula>where, <inline-formula id="ieqn-47"><mml:math id="mml-ieqn-47"><mml:msubsup><mml:mi>X</mml:mi><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mo>+</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msubsup></mml:math></inline-formula> presents the updated binary position at <inline-formula id="ieqn-48"><mml:math id="mml-ieqn-48"><mml:mi>t</mml:mi><mml:mo>+</mml:mo><mml:mn>1</mml:mn></mml:math></inline-formula> iteration; <inline-formula id="ieqn-49"><mml:math id="mml-ieqn-49"><mml:msubsup><mml:mi>X</mml:mi><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msubsup></mml:math></inline-formula> denotes the binary position at <inline-formula id="ieqn-50"><mml:math id="mml-ieqn-50"><mml:mi>t</mml:mi></mml:math></inline-formula> iteration; <inline-formula id="ieqn-51"><mml:math id="mml-ieqn-51"><mml:mo stretchy="false">(</mml:mo><mml:msubsup><mml:mi>X</mml:mi><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msubsup><mml:msup><mml:mo stretchy="false">)</mml:mo><mml:mrow><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula> is the complement of <inline-formula id="ieqn-52"><mml:math id="mml-ieqn-52"><mml:msubsup><mml:mi>X</mml:mi><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msubsup></mml:math></inline-formula>; <inline-formula id="ieqn-53"><mml:math id="mml-ieqn-53"><mml:mi>R</mml:mi></mml:math></inline-formula> is a random number <inline-formula id="ieqn-54"><mml:math id="mml-ieqn-54"><mml:mo>&#x2208;</mml:mo><mml:mo stretchy="false">[</mml:mo><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mn>1</mml:mn><mml:mo stretchy="false">]</mml:mo></mml:math></inline-formula>; <inline-formula id="ieqn-55"><mml:math id="mml-ieqn-55"><mml:mi>T</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mi>x</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:math></inline-formula> is the V-shaped transfer function; and <inline-formula id="ieqn-56"><mml:math id="mml-ieqn-56"><mml:mi>&#x03C6;</mml:mi></mml:math></inline-formula> is a threshold number <inline-formula id="ieqn-57"><mml:math id="mml-ieqn-57"><mml:mo>&#x2208;</mml:mo><mml:mrow><mml:mo>[</mml:mo><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mfrac><mml:mi>&#x03C0;</mml:mi><mml:mn>10</mml:mn></mml:mfrac></mml:mstyle><mml:mo>,</mml:mo><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mfrac><mml:mi>&#x03C0;</mml:mi><mml:mn>5</mml:mn></mml:mfrac></mml:mstyle><mml:mo>]</mml:mo></mml:mrow></mml:math></inline-formula>. This modification contributes to reduced computational complexity and faster convergence rates, particularly when dealing with large solution spaces or problems with binary constraints.</p>
</sec>
<sec id="s2_4">
<label>2.4</label>
<title>The Proposed RL Technique</title>
<p>The primary goal of the RL algorithm is to dynamically adjust the power allocation coefficients <inline-formula id="ieqn-58"><mml:math id="mml-ieqn-58"><mml:msub><mml:mi>&#x03B3;</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> for each SM in a way that maximizes the secrecy rate against a sophisticated eavesdropper. The RL framework is modeled as a Markov decision process (MDP), where at each decision epoch, the system state includes the current CSI of all SMs and the eavesdropper, represented by their respective channel gains <inline-formula id="ieqn-59"><mml:math id="mml-ieqn-59"><mml:msup><mml:mrow><mml:mo>|</mml:mo><mml:msub><mml:mi>h</mml:mi><mml:mrow><mml:mrow><mml:msub><mml:mi>m</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mrow></mml:msub><mml:mo>|</mml:mo></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula> and <inline-formula id="ieqn-60"><mml:math id="mml-ieqn-60"><mml:msup><mml:mrow><mml:mo>|</mml:mo><mml:msub><mml:mi>h</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub><mml:mo>|</mml:mo></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>. The action space consists of possible power allocation vectors <inline-formula id="ieqn-61"><mml:math id="mml-ieqn-61"><mml:mi>&#x03B3;</mml:mi><mml:mo>=</mml:mo><mml:mrow><mml:mo>[</mml:mo><mml:msub><mml:mi>&#x03B3;</mml:mi><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>&#x03B3;</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mo>&#x2026;</mml:mo><mml:mo>,</mml:mo><mml:msub><mml:mi>&#x03B3;</mml:mi><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mo>]</mml:mo></mml:mrow></mml:math></inline-formula> within the power budget set by the NG&#x2019;s total transmission power <inline-formula id="ieqn-62"><mml:math id="mml-ieqn-62"><mml:mi>P</mml:mi></mml:math></inline-formula>.</p>
<p>MDP provides a structured way to model an environment in which an agent interacts and makes decisions over time. The core components of an MDP are states, actions, transition functions, reward functions, and policies. In the MDP framework, a state represents a specific situation or configuration of the environment. For SG communications, a state could encompass various factors such as the current security level, network traffic, and channel conditions. Actions are the decisions or moves that the agent can make in each state, such as adjusting transmission power or changing encryption parameters to enhance security. These actions lead to transitions between states, which are governed by the transition function. This function provides the probabilities of moving from one state to another, given a particular action, effectively modeling the dynamics of the environment.</p>
<p>The reward function is another critical component of the MDP framework. It assigns a numerical value to each state-action pair, representing the immediate feedback or benefit of taking a specific action in a given state. In the context of secrecy optimization in SGs, rewards could reflect improvements in the secrecy capacity rate, better energy efficiency, or other performance metrics. The MDP framework is particularly well-suited to problems like secrecy optimization in SGs because it explicitly accounts for the sequential nature of decision-making and the stochastic nature of the environment. By modeling the problem as an MDP, the RL agent can systematically explore different strategies and learn to make decisions that enhance security and efficiency over time. This approach contrasts with traditional machine learning methods, which may not fully capture the temporal and probabilistic aspects of the problem, making RL a powerful tool for optimizing secrecy rates in SG communications.</p>
<p>The RL agent&#x2019;s objective is to learn a policy <inline-formula id="ieqn-63"><mml:math id="mml-ieqn-63"><mml:mrow><mml:mi mathvariant="normal">&#x03C0;</mml:mi></mml:mrow></mml:math></inline-formula> that selects actions to maximize the cumulative discounted secrecy rate over time, defined as <xref ref-type="disp-formula" rid="eqn-18">Eq. (18)</xref> [<xref ref-type="bibr" rid="ref-41">41</xref>].
<disp-formula id="eqn-18"><label>(18)</label><mml:math id="mml-eqn-18" display="block"><mml:msubsup><mml:mi>R</mml:mi><mml:mrow><mml:mi>S</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x03C0;</mml:mi></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:mrow><mml:mi mathvariant="double-struck">E</mml:mi></mml:mrow><mml:mrow><mml:mo>[</mml:mo><mml:munderover><mml:mo movablelimits="false">&#x2211;</mml:mo><mml:mrow><mml:mi>t</mml:mi><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:mrow><mml:mrow><mml:mrow><mml:mi mathvariant="normal">&#x221E;</mml:mi></mml:mrow></mml:mrow></mml:munderover><mml:msup><mml:mi>&#x03B4;</mml:mi><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msup><mml:msubsup><mml:mi>R</mml:mi><mml:mrow><mml:mi>S</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msubsup><mml:mrow><mml:mo>(</mml:mo><mml:mi>t</mml:mi><mml:mo>)</mml:mo></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:math></disp-formula>where <inline-formula id="ieqn-64"><mml:math id="mml-ieqn-64"><mml:mrow><mml:mi mathvariant="double-struck">E</mml:mi></mml:mrow><mml:mo stretchy="false">[</mml:mo><mml:mo>.</mml:mo><mml:mo stretchy="false">]</mml:mo></mml:math></inline-formula> is the expectation operator, <inline-formula id="ieqn-65"><mml:math id="mml-ieqn-65"><mml:msubsup><mml:mi>R</mml:mi><mml:mrow><mml:mi>S</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msubsup><mml:mrow><mml:mo>(</mml:mo><mml:mi>t</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:math></inline-formula> is the instantaneous secrecy rate at time <inline-formula id="ieqn-66"><mml:math id="mml-ieqn-66"><mml:mi>t</mml:mi></mml:math></inline-formula>, and <inline-formula id="ieqn-67"><mml:math id="mml-ieqn-67"><mml:mrow><mml:mi mathvariant="normal">&#x03B4;</mml:mi></mml:mrow></mml:math></inline-formula> is a discount factor that prioritizes immediate rewards. We propose to utilize a DQN due to its ability to handle high-dimensional state spaces. The DQN comprises a neural network that approximates the optimal action-value function <inline-formula id="ieqn-68"><mml:math id="mml-ieqn-68"><mml:msup><mml:mi>Q</mml:mi><mml:mrow><mml:mo>&#x2217;</mml:mo></mml:mrow></mml:msup><mml:mo stretchy="false">(</mml:mo><mml:mi>s</mml:mi><mml:mo>,</mml:mo><mml:mi>a</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula>. The network is trained iteratively using experience replay and target networks to stabilize learning. The experiences (<inline-formula id="ieqn-69"><mml:math id="mml-ieqn-69"><mml:mi>s</mml:mi><mml:mo>,</mml:mo><mml:mi>a</mml:mi><mml:mo>,</mml:mo><mml:mi>r</mml:mi><mml:mo>,</mml:mo><mml:msup><mml:mi>s</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup></mml:math></inline-formula>) are stored in a replay buffer, where <inline-formula id="ieqn-70"><mml:math id="mml-ieqn-70"><mml:msup><mml:mi>s</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup></mml:math></inline-formula> is the new state after taking action <inline-formula id="ieqn-71"><mml:math id="mml-ieqn-71"><mml:mi>a</mml:mi></mml:math></inline-formula> in state <inline-formula id="ieqn-72"><mml:math id="mml-ieqn-72"><mml:mi>s</mml:mi></mml:math></inline-formula> and receiving reward <inline-formula id="ieqn-73"><mml:math id="mml-ieqn-73"><mml:mi>r</mml:mi></mml:math></inline-formula>.</p>
<p>The reward at each time step is designed to reflect the improvement in secrecy rate. Therefore, if the action taken at time <inline-formula id="ieqn-74"><mml:math id="mml-ieqn-74"><mml:mi>t</mml:mi></mml:math></inline-formula> leads to an increase in the secrecy rate from <inline-formula id="ieqn-75"><mml:math id="mml-ieqn-75"><mml:msubsup><mml:mi>R</mml:mi><mml:mrow><mml:mi>S</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msubsup><mml:mrow><mml:mo>(</mml:mo><mml:mi>t</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn><mml:mo>)</mml:mo></mml:mrow></mml:math></inline-formula> to <inline-formula id="ieqn-76"><mml:math id="mml-ieqn-76"><mml:msubsup><mml:mi>R</mml:mi><mml:mrow><mml:mi>S</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msubsup><mml:mrow><mml:mo>(</mml:mo><mml:mi>t</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:math></inline-formula>, the reward <inline-formula id="ieqn-77"><mml:math id="mml-ieqn-77"><mml:mi>r</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>t</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> is given by the difference <inline-formula id="ieqn-78"><mml:math id="mml-ieqn-78"><mml:msubsup><mml:mi>R</mml:mi><mml:mrow><mml:mi>S</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msubsup><mml:mrow><mml:mo>(</mml:mo><mml:mi>t</mml:mi><mml:mo>)</mml:mo></mml:mrow><mml:mo>&#x2212;</mml:mo><mml:msubsup><mml:mi>R</mml:mi><mml:mrow><mml:mi>S</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msubsup><mml:mrow><mml:mo>(</mml:mo><mml:mi>t</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn><mml:mo>)</mml:mo></mml:mrow></mml:math></inline-formula>. This incentivizes the agent to pursue actions that enhance security. The DQN agent is trained over a series of episodes. In each episode, the environment is initialized with a random state, and the agent iteratively selects actions based on &#x03F5;-greedy policy to explore the action space and exploit the current best-known policy. The performance of the trained agent is evaluated by its ability to maintain a high secrecy rate over a separate validation set of channel realizations. The RL agent&#x2019;s learned policy is expected to adeptly allocate power among the SMs, accounting for the dynamic nature of the SG environment and the potential internal threat posed by an eavesdropper. By doing so, the algorithm ensures that <italic>k</italic>-th SM, which has the highest risk of information leakage, maintains a secure channel. The proposed RL, with its adaptive power allocation strategy, promises a significant enhancement in the security of SG communications. By optimizing the power distribution in real-time, the network&#x2019;s overall secrecy performance is bolstered, ensuring the integrity and confidentiality of critical control messages within the SG NAN. By integrating the IChOA into the training process, the RL agent is expected to learn more robust policies faster and with better convergence properties compared to standard algorithms. This can lead to improved performance in complex SG environments, where the agent must make decisions that enhance the security and efficiency of the network.</p>
<p>In the proposed IChOA-RL, the IChOA enhances the RL framework by optimizing key hyper-parameters such as weights, biases, learning rate, &#x03B5;-greedy parameters, and batch size. By leveraging advanced search mechanisms inspired by chimpanzee behavior, IChOA effectively balances exploration and exploitation within the hyper-parameter space. This process allows for the fine-tuning of weights and biases, leading to more accurate neural network mappings and improved decision-making in complex environments like SGs. Additionally, IChOA dynamically adjusts the learning rate to ensure efficient convergence, optimizes the &#x03B5;-greedy parameter to maintain a balanced exploration-exploitation trade-off, and selects an optimal batch size that balances computational efficiency with learning stability. The integration of IChOA into the RL framework results in a synergistic optimization of these parameters, considering their interdependencies to maximize overall performance. This holistic approach not only accelerates the convergence of the RL agent but also enhances the robustness of the learned policies, making the agent better equipped to handle the complexities and dynamism of SG communications. Ultimately, IChOA&#x2019;s optimization process significantly improves the efficiency and effectiveness of RL training, leading to more reliable and secure SG operations.</p>
</sec>
</sec>
<sec id="s3">
<label>3</label>
<title>Simulation Results and Analysis</title>
<p>The simulation environment is configured to evaluate the secrecy rate performance of an SG NOMA communication system under various ML and RL algorithms. The setup includes an NG transmitting to several SMs in the presence of an eavesdropper. The number of SMs <inline-formula id="ieqn-79"><mml:math id="mml-ieqn-79"><mml:mi>K</mml:mi><mml:mo>=</mml:mo><mml:mn>3</mml:mn></mml:math></inline-formula> unless otherwise mentioned. The performance metrics are assessed against the power allocation coefficient (<inline-formula id="ieqn-80"><mml:math id="mml-ieqn-80"><mml:msub><mml:mi>&#x03B3;</mml:mi><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> in dB) and the total transmission power (<inline-formula id="ieqn-81"><mml:math id="mml-ieqn-81"><mml:mi>P</mml:mi></mml:math></inline-formula> in mW). The algorithms compared with our proposed IChOA-RL in the simulation include traditional ML approaches like standard ChOA, GWO, LSTM, RNN, KNN, SVM, and standard RL model. The selection of comparison algorithms in this study was carefully made to ensure a comprehensive evaluation of the proposed IChOA-RL approach. These algorithms were chosen for their relevance to SG communications, their diversity in representing both traditional ML and advanced optimization techniques, and their proven track records in tasks such as classification, prediction, and optimization. The scope of potential values for each parameter during the simulation process has been extensive; however, due to pragmatic constraints, it is necessary to choose and exhibit a limited set of diverse parameter instances. <xref ref-type="table" rid="table-1">Table 1</xref> provides a snapshot of this selection, shedding light on the experimental process by emphasizing the specific parameter values that, in certain instances, either enhanced or diminished the performance of the algorithm.</p>
<table-wrap id="table-1">
<label>Table 1</label>
<caption>
<title>Parameter setting of proposed methods</title>
</caption>
<table frame="hsides">
<colgroup>
<col align="left"/>
<col align="left" />
<col align="left" />
</colgroup>
<thead>
<tr>
<th>Method</th>
<th>Parameter</th>
<th>Value</th>
</tr>
</thead>
<tbody>
<tr>
<td rowspan="4">IChOA</td>
<td>a</td>
<td>[&#x2212;1, 1]</td>
</tr>
<tr>
<td>f</td>
<td>Linearly from 2 to 0</td>
</tr>
<tr>
<td>Population size</td>
<td>100</td>
</tr>
<tr>
<td>Iteration</td>
<td>300</td>
</tr>
<tr>
<td rowspan="5">GWO</td>
<td>C</td>
<td>0.7</td>
</tr>
<tr>
<td>A</td>
<td>0.3</td>
</tr>
<tr>
<td>&#x03B1;</td>
<td>[0, 2]</td>
</tr>
<tr>
<td>Population size</td>
<td>100</td>
</tr>
<tr>
<td>Iteration</td>
<td>300</td>
</tr>
<tr>
<td rowspan="4">I-CSA</td>
<td>Flight length (FL)</td>
<td>2</td>
</tr>
<tr>
<td>Awareness probability (AP)</td>
<td>0.1</td>
</tr>
<tr>
<td>Population size</td>
<td>100</td>
</tr>
<tr>
<td>Iteration</td>
<td>300</td>
</tr>
<tr>
<td rowspan="5">KNN</td>
<td>Number of neighbors (k)</td>
<td>6</td>
</tr>
<tr>
<td>Distance metric</td>
<td>Euclidean distance</td>
</tr>
<tr>
<td>Weights</td>
<td>Uniform</td>
</tr>
<tr>
<td>Algorithm</td>
<td>Kd-tree</td>
</tr>
<tr>
<td>Leaf size</td>
<td>30</td>
</tr>
<tr>
<td rowspan="3">SVM</td>
<td>Kernel type</td>
<td>Linear and RBF</td>
</tr>
<tr>
<td>Gamma</td>
<td>0.003</td>
</tr>
<tr>
<td>Number of estimators</td>
<td>100</td>
</tr>
<tr>
<td rowspan="6">RNN</td>
<td>Number of hidden layers</td>
<td>8</td>
</tr>
<tr>
<td>Number of neurons in hidden layers</td>
<td>30</td>
</tr>
<tr>
<td>Learning rate</td>
<td>0.09</td>
</tr>
<tr>
<td>Dropout rate</td>
<td>0.2</td>
</tr>
<tr>
<td>Activation</td>
<td>Tanh and sigmoid</td>
</tr>
<tr>
<td>Optimizer</td>
<td>SGD</td>
</tr>
<tr>
<td rowspan="6">LSTM</td>
<td>Number of hidden layers</td>
<td>10</td>
</tr>
<tr>
<td>Number of neurons in hidden layers</td>
<td>35</td>
</tr>
<tr>
<td>Learning rate</td>
<td>0.10</td>
</tr>
<tr>
<td>Recurrent dropout Rate</td>
<td>0.3</td>
</tr>
<tr>
<td>Activation</td>
<td>ReLU and Tanh</td>
</tr>
<tr>
<td>Optimizer</td>
<td>Adam</td>
</tr>
<tr>
<td rowspan="5">RL</td>
<td>Memory size</td>
<td>8000</td>
</tr>
<tr>
<td>Learning rate</td>
<td>0.0005</td>
</tr>
<tr>
<td><inline-formula id="ieqn-82"><mml:math id="mml-ieqn-82"><mml:mi mathvariant="bold-italic">&#x03B5;</mml:mi></mml:math></inline-formula>-greedy</td>
<td>0.4&#x2013;0.9</td>
</tr>
<tr>
<td>Batch size</td>
<td>256</td>
</tr>
<tr>
<td>Optimizer</td>
<td>Adam</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Calibrating parameters for ML algorithms is critical for achieving peak performance and demands careful consideration. It entails identifying the best combinations of parameter values for the algorithms to function efficiently. Establishing these optimal settings is crucial before proceeding with the performance evaluation of the algorithm. In this research, we adopt a systematic trial-and-error approach for parameter tuning, methodically adjusting each parameter separately and monitoring its impact while maintaining all other variables constant. For instance, in an algorithm with multiple parameters such as the number of hidden layers, or iteration, we analyze each parameter independently to assess its effect on the algorithm&#x2019;s performance. Although there are numerous possible variations for each parameter, practical constraints require us to select and demonstrate a limited range of different parameter scenarios. For our simulations, we utilized OpenAI Gym as the primary simulation environment for training the RL agents. Additionally, we integrated TensorFlow to implement the neural network components of the RL algorithm. To incorporate and evaluate the proposed evolutionary algorithm for updating the fully connected layers in this paper, we employed the distributed evolutionary algorithms in python (DEAP) library.</p>
<p><xref ref-type="fig" rid="fig-4">Fig. 4</xref> presents a detailed analysis of the secrecy rate&#x2019;s dependency on the power allocation coefficient <inline-formula id="ieqn-83"><mml:math id="mml-ieqn-83"><mml:msub><mml:mi>&#x03B3;</mml:mi><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>, as represented in decibels (dB), across various algorithmic strategies. In this figure, the proposed IChOA-RL approach consistently outperforms the other methods, showcasing a superior secrecy rate across the entire <inline-formula id="ieqn-84"><mml:math id="mml-ieqn-84"><mml:msub><mml:mi>&#x03B3;</mml:mi><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> range. This suggests that the IChOA-RL&#x2019;s optimization process is effectively enhancing the RL agent&#x2019;s ability to allocate power in a way that maximizes the secrecy rate, regardless of the power coefficient&#x2019;s magnitude. The RL method alone shows notable improvement over the traditional ML techniques of RNN, LSTM, KNN, and SVM, which indicates the inherent advantage of adaptive learning in dynamic environments. However, KNN and SVM, despite being less dynamic, provide a baseline performance that, while not scaling as well with increased <inline-formula id="ieqn-85"><mml:math id="mml-ieqn-85"><mml:msub><mml:mi>&#x03B3;</mml:mi><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>, still contributes to our understanding of the impact of power allocation on secrecy. The graph also indicates a diminishing return on the secrecy rate as the power allocation coefficient increases, particularly for KNN and SVM, suggesting a threshold beyond which increasing power does not yield proportional secrecy gains. Overall, the performance trends in <xref ref-type="fig" rid="fig-4">Fig. 4</xref> highlight the effectiveness of integrating advanced optimization techniques like IChOA with RL in enhancing secure communications in SG NOMA communication.</p>
<fig id="fig-4">
<label>Figure 4</label>
<caption>
<title>Secrecy rate <italic>vs.</italic> the power allocation coefficient across various algorithms</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMC_56823-fig-4.tif"/>
</fig>
<p><xref ref-type="fig" rid="fig-5">Fig. 5</xref> provides an insightful illustration of how the secrecy rate varies under the proposed IChOA-RL approach with the power allocation coefficient <inline-formula id="ieqn-86"><mml:math id="mml-ieqn-86"><mml:msub><mml:mi>&#x03B3;</mml:mi><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> for different quantities of NOMA SMs, designated as <inline-formula id="ieqn-87"><mml:math id="mml-ieqn-87"><mml:mi>K</mml:mi></mml:math></inline-formula>. The curves represent four distinct scenarios, with <inline-formula id="ieqn-88"><mml:math id="mml-ieqn-88"><mml:mi>K</mml:mi></mml:math></inline-formula> taking on values of 6, 10, 14, and 18, respectively. As can be seen in this figure, when <inline-formula id="ieqn-89"><mml:math id="mml-ieqn-89"><mml:msub><mml:mi>&#x03B3;</mml:mi><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> increases, a corresponding incremental rise in the secrecy rate is observed for each scenario, which aligns with the theoretical understanding that a higher power allocation coefficient enhances the signal&#x2019;s robustness against potential eavesdropping, thus improving secrecy.</p>
<fig id="fig-5">
<label>Figure 5</label>
<caption>
<title>Secrecy rate under the proposed IChOA-RL approach <italic>vs.</italic> the power allocation coefficient for different numbers of NOM SMs</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMC_56823-fig-5.tif"/>
</fig>
<p>Notably, the rate at which the secrecy rate increases with <inline-formula id="ieqn-90"><mml:math id="mml-ieqn-90"><mml:msub><mml:mi>&#x03B3;</mml:mi><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> is more pronounced as the number of SMs, K, grows. This indicates a multiplicative effect of NOMA&#x2019;s power domain exploitation when more SMs are present; essentially, the system can better differentiate between the intended signal and potential eavesdropping attempts. The scenario with K &#x003D; 18 SMs achieves the lowest secrecy rate, suggesting that a larger network of SMs can utilize the intrinsic properties of NOMA more effectively, translating into superior secure communication capabilities. This could be due to the lesser complexity and reduced efficacy in channel use when fewer SMs are involved. These trends collectively highlight the effectiveness of NOMA in enhancing secure communications, particularly as the number of participating SMs in the network increases.</p>
<p><xref ref-type="fig" rid="fig-6">Fig. 6</xref> delves into the relationship between the total transmission power, denoted by <inline-formula id="ieqn-91"><mml:math id="mml-ieqn-91"><mml:mi>P</mml:mi></mml:math></inline-formula> and measured in milliwatts (mW), and the resulting secrecy rate, offering a comparative analysis across different algorithmic approaches. As the transmission power increases, all techniques exhibit an upward trend in secrecy rate, indicative of the direct correlation between transmission power and the ability to maintain secure communications. The IChOA-RL technique demonstrates a clear superiority, achieving higher secrecy rates at any given power level. This suggests that the IChOA&#x2019;s sophisticated optimization algorithm significantly refines the RL agent&#x2019;s power allocation decisions, leading to more effective secrecy enhancements.</p>
<fig id="fig-6">
<label>Figure 6</label>
<caption>
<title>Secrecy rate <italic>vs.</italic> the NG&#x2019;s transmission power across various algorithms</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMC_56823-fig-6.tif"/>
</fig>
<p>Notably, the slope of the IChOA-RL curve is steeper than that of the other methods, especially in the mid-range of the power spectrum, indicating a more efficient conversion of increased power into higher secrecy rates. This efficiency is a critical advantage in real-world applications where power resources are limited and must be used judiciously. The RNN, LSTM, KNN, and SVM methods, while showing improvements with increased power, plateau sooner than the RL-based approaches, revealing the limitations of static models in leveraging additional power for secrecy. The GWO-RL, I-CSA-RL, ChOA-RL, and RL curves, while outperforming the traditional ML models, still lag behind the IChOA-RL, underscoring the impact of the improved optimization algorithm on RL&#x2019;s adaptability and performance. <xref ref-type="fig" rid="fig-6">Fig. 6</xref> finally illustrates not only the beneficial impact of higher transmission power on secrecy rates but also underscores the enhanced performance that can be achieved by a more powerful algorithm.</p>
<p>In this paper, the results were evaluated using three metrics: accuracy, <inline-formula id="ieqn-92"><mml:math id="mml-ieqn-92"><mml:msup><mml:mi>R</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>, and RMSE. The coefficient of determination quantifies the correlation between observed and predicted values, with values ranging from 0 to 1. A value of one signifies perfect correlation, whereas a value of zero indicates no correlation between the observed and predicted values. <xref ref-type="disp-formula" rid="eqn-19">Eqs (19)</xref>&#x2013;<xref ref-type="disp-formula" rid="eqn-21">(21)</xref> provide the formulas for calculating <inline-formula id="ieqn-93"><mml:math id="mml-ieqn-93"><mml:msup><mml:mi>R</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>, <italic>RMSE</italic>, and <italic>Accuracy</italic>.
<disp-formula id="eqn-19"><label>(19)</label><mml:math id="mml-eqn-19" display="block"><mml:msup><mml:mi>R</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup><mml:mo>=</mml:mo><mml:msup><mml:mrow><mml:mo>[</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mi>N</mml:mi></mml:mfrac><mml:mfrac><mml:mrow><mml:msubsup><mml:mo movablelimits="false">&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>N</mml:mi></mml:mrow></mml:msubsup><mml:mrow><mml:mo>[</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>P</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:mover><mml:mi>P</mml:mi><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover><mml:mo stretchy="false">)</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>O</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:mover><mml:mi>O</mml:mi><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover><mml:mo stretchy="false">)</mml:mo><mml:mo>]</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:msub><mml:mi>&#x03C3;</mml:mi><mml:mrow><mml:mi>p</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mi>&#x03C3;</mml:mi><mml:mrow><mml:mi>o</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac><mml:mo>]</mml:mo></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:math></disp-formula>
<disp-formula id="eqn-20"><label>(20)</label><mml:math id="mml-eqn-20" display="block"><mml:mi>R</mml:mi><mml:mi>M</mml:mi><mml:mi>S</mml:mi><mml:mi>E</mml:mi><mml:mo>=</mml:mo><mml:msup><mml:mrow><mml:mo>(</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mi>N</mml:mi></mml:mfrac><mml:munderover><mml:mo movablelimits="false">&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>N</mml:mi></mml:mrow></mml:munderover><mml:msup><mml:mrow><mml:mo>[</mml:mo><mml:msub><mml:mi>P</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>O</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>]</mml:mo></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup><mml:mo>)</mml:mo></mml:mrow><mml:mrow><mml:mfrac><mml:mn>1</mml:mn><mml:mn>2</mml:mn></mml:mfrac></mml:mrow></mml:msup></mml:math></disp-formula>
<disp-formula id="eqn-21"><label>(21)</label><mml:math id="mml-eqn-21" display="block"><mml:mi>A</mml:mi><mml:mi>c</mml:mi><mml:mi>c</mml:mi><mml:mi>u</mml:mi><mml:mi>r</mml:mi><mml:mi>a</mml:mi><mml:mi>c</mml:mi><mml:mi>y</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi>T</mml:mi><mml:mi>P</mml:mi><mml:mo>+</mml:mo><mml:mi>T</mml:mi><mml:mi>N</mml:mi></mml:mrow><mml:mrow><mml:mi>T</mml:mi><mml:mi>P</mml:mi><mml:mo>+</mml:mo><mml:mi>F</mml:mi><mml:mi>N</mml:mi><mml:mo>+</mml:mo><mml:mi>F</mml:mi><mml:mi>P</mml:mi><mml:mo>+</mml:mo><mml:mi>T</mml:mi><mml:mi>N</mml:mi></mml:mrow></mml:mfrac></mml:math></disp-formula>where <inline-formula id="ieqn-94"><mml:math id="mml-ieqn-94"><mml:mi>N</mml:mi></mml:math></inline-formula> is the number of observations; <inline-formula id="ieqn-95"><mml:math id="mml-ieqn-95"><mml:msub><mml:mi>P</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> is the calculated parameter; <inline-formula id="ieqn-96"><mml:math id="mml-ieqn-96"><mml:msub><mml:mi>O</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> is the observed parameter; <inline-formula id="ieqn-97"><mml:math id="mml-ieqn-97"><mml:mover><mml:mi>P</mml:mi><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover></mml:math></inline-formula> is the average calculation parameter; <inline-formula id="ieqn-98"><mml:math id="mml-ieqn-98"><mml:mover><mml:mi>O</mml:mi><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover></mml:math></inline-formula> is the average observations parameter; <inline-formula id="ieqn-99"><mml:math id="mml-ieqn-99"><mml:msub><mml:mi>&#x03C3;</mml:mi><mml:mrow><mml:mi>p</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> is the standard deviation of calculations; <inline-formula id="ieqn-100"><mml:math id="mml-ieqn-100"><mml:msub><mml:mi>&#x03C3;</mml:mi><mml:mrow><mml:mi>o</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> is the standard deviation of observations; <inline-formula id="ieqn-101"><mml:math id="mml-ieqn-101"><mml:mi>T</mml:mi><mml:mi>N</mml:mi></mml:math></inline-formula> &#x003D; true negative; <inline-formula id="ieqn-102"><mml:math id="mml-ieqn-102"><mml:mi>T</mml:mi><mml:mi>P</mml:mi></mml:math></inline-formula> &#x003D; true positive; <inline-formula id="ieqn-103"><mml:math id="mml-ieqn-103"><mml:mi>F</mml:mi><mml:mi>N</mml:mi></mml:math></inline-formula> &#x003D; false negative; and <inline-formula id="ieqn-104"><mml:math id="mml-ieqn-104"><mml:mi>F</mml:mi><mml:mi>P</mml:mi></mml:math></inline-formula> &#x003D; false positive.</p>
<p><xref ref-type="table" rid="table-2">Table 2</xref> displays <inline-formula id="ieqn-105"><mml:math id="mml-ieqn-105"><mml:msup><mml:mi>R</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>, accuracy and runtime results for various evolutionary architectures designed to secure NOMA communication in SGs. The data clearly indicate that the IChOA-RL architecture outperforms the others in terms of both <inline-formula id="ieqn-106"><mml:math id="mml-ieqn-106"><mml:msup><mml:mi>R</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula> and accuracy, not just in the training set but also in the validation set. The IChOA-RL architecture achieved accuracy levels of 97.41% in the testing set and 98.86% in the training set. When it is stated that the IChOA-RL architecture has the highest <inline-formula id="ieqn-107"><mml:math id="mml-ieqn-107"><mml:msup><mml:mi>R</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula> value, it implies that this architecture most accurately captures and explains the variations in the problem. In other words, it fits the actual data points best and offers the most reliable predictive power among the architectures evaluated. The I-CSA-RL, GWO-RL, ChOA-RL, and RL models also recorded relatively strong performance. Conversely, the LSTM, RNN, SVM, and KNN algorithms demonstrated lower effectiveness. When comparing the runtime across the different methods, IChOA-RL demonstrates a significant advantage with a runtime of 724 s, making it the most efficient among the advanced algorithms evaluated. In contrast, I-CSA-RL, GWO-RL, and ChOA-RL require substantially more time, with runtimes of 985, 1024, and 896 s, respectively, indicating higher computational demands. Traditional methods like RNN, LSTM, SVM, and KNN show moderate runtimes ranging from 869 to 941 s, with the standard RL method being relatively faster at 659 s. This comparison highlights IChOA-RL&#x2019;s efficiency in delivering high performance without incurring excessive computational costs.</p>
<table-wrap id="table-2">
<label>Table 2</label>
<caption>
<title>The results of proposed architectures for secure NOMA communication in SGs</title>
</caption>
<table frame="hsides">
<colgroup>
<col align="left" />
<col align="left" />
<col align="left" />
<col align="left" />
<col align="left" />
<col align="left" />
</colgroup>
<thead>
<tr>
<th>Method</th>
<th colspan="2">Training dataset</th>
<th colspan="2">Validation dataset</th>
<th>Run time (s)</th>
</tr>
<tr>
<th/>
<th><inline-formula id="ieqn-108"><mml:math id="mml-ieqn-108"><mml:msup><mml:mi mathvariant="bold-italic">R</mml:mi><mml:mrow><mml:mrow><mml:mtext mathvariant="bold">2</mml:mtext></mml:mrow></mml:mrow></mml:msup></mml:math></inline-formula> (%)</th>
<th>Accuracy (%)</th>
<th><inline-formula id="ieqn-109"><mml:math id="mml-ieqn-109"><mml:msup><mml:mi mathvariant="bold-italic">R</mml:mi><mml:mrow><mml:mrow><mml:mtext mathvariant="bold">2</mml:mtext></mml:mrow></mml:mrow></mml:msup></mml:math></inline-formula> (%)</th>
<th>Accuracy (%)</th>
<th/>
</tr>
</thead>
<tbody>
<tr>
<td>IChOA-RL</td>
<td>96.27</td>
<td>98.86</td>
<td>95.77</td>
<td>97.41</td>
<td>724</td>
</tr>
<tr>
<td>I-CSA-RL</td>
<td>94.29</td>
<td>96.41</td>
<td>92.18</td>
<td>94.53</td>
<td>985</td>
</tr>
<tr>
<td>GWO-RL</td>
<td>93.48</td>
<td>95.84</td>
<td>91.52</td>
<td>93.28</td>
<td>1024</td>
</tr>
<tr>
<td>ChOA-RL</td>
<td>92.18</td>
<td>94.49</td>
<td>90.44</td>
<td>92.76</td>
<td>896</td>
</tr>
<tr>
<td>RL</td>
<td>90.92</td>
<td>92.51</td>
<td>89.36</td>
<td>90.43</td>
<td>659</td>
</tr>
<tr>
<td>RNN</td>
<td>88.74</td>
<td>91.09</td>
<td>86.48</td>
<td>89.18</td>
<td>874</td>
</tr>
<tr>
<td>LSTM</td>
<td>87.49</td>
<td>90.17</td>
<td>85.81</td>
<td>88.82</td>
<td>903</td>
</tr>
<tr>
<td>SVM</td>
<td>85.19</td>
<td>87.19</td>
<td>83.37</td>
<td>85.63</td>
<td>941</td>
</tr>
<tr>
<td>KNN</td>
<td>84.72</td>
<td>86.76</td>
<td>82.26</td>
<td>83.18</td>
<td>869</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>These results highlight the successful training of these architectures with meta-heuristic algorithms, which have effectively optimized their operational efficiency. Moreover, these architectures consistently demonstrate high accuracy across different hybrid RL structures in both testing and training datasets. This consistent performance suggests that the meta-heuristic algorithms used in the training processes have delivered reliable and uniform results across various models and datasets. The RMSE metric is used to evaluate the performance of the models presented in <xref ref-type="table" rid="table-3">Table 3</xref>. The results clearly show that the IChOA-RL surpasses its competitors, highlighting its effectiveness for the problem at hand. This model enhances the RL network by efficiently updating its weight and bias vectors through the integration of IChOA. The IChOA effectively tunes the parameters, enabling the RL network to more accurately detect and model the patterns and relationships in the data. According to <xref ref-type="fig" rid="fig-7">Fig. 7</xref>, the IChOA-RL converges more quickly than the others. By the 100th epoch, it almost reaches the lowest RMSE score, while the RMSE scores for the other architectures remain higher. Additionally, the IChOA-RL shows exceptional stability and swift convergence as epoch&#x2019;s progress. The significant initial drop in RMSE for the model showcases a strong capacity for learning, and its sustained low error rate suggests it generalizes well across the dataset. In contrast, other models gradually improve but fail to achieve the low RMSE scores of the IChOA-RL. For example, SVM and KNN exhibit a slower reduction in RMSE. Other architectures like I-CSA-RL, GWO-RL, ChOA-RL, RL, RNN, and LSTM show moderate learning speeds. They manage to lower the RMSE to a commendable level, yet their convergence trajectories indicate they may need additional epochs to potentially equal the performance of IChOA-RL.</p>
<table-wrap id="table-3">
<label>Table 3</label>
<caption>
<title>The RMSE values of the proposed methods</title>
</caption>
<table frame="hsides">
<colgroup>
<col align="left" />
<col align="left" />
<col align="left" />
</colgroup>
<thead>
<tr>
<th rowspan="2">Method</th>
<th colspan="2">RMSE</th>
</tr>
<tr>
<th>Training dataset</th>
<th>Validation dataset</th>
</tr>
</thead>
<tbody>
<tr>
<td>IChOA-RL</td>
<td>0.08</td>
<td>0.95</td>
</tr>
<tr>
<td>I-CSA-RL</td>
<td>1.25</td>
<td>3.46</td>
</tr>
<tr>
<td>GWO-RL</td>
<td>2.98</td>
<td>4.24</td>
</tr>
<tr>
<td>ChOA-RL</td>
<td>3.57</td>
<td>5.69</td>
</tr>
<tr>
<td>RL</td>
<td>5.79</td>
<td>8.37</td>
</tr>
<tr>
<td>RNN</td>
<td>7.29</td>
<td>10.02</td>
</tr>
<tr>
<td>LSTM</td>
<td>9.15</td>
<td>11.73</td>
</tr>
<tr>
<td>SVM</td>
<td>15.24</td>
<td>20.18</td>
</tr>
<tr>
<td>KNN</td>
<td>18.34</td>
<td>21.73</td>
</tr>
</tbody>
</table>
</table-wrap><fig id="fig-7">
<label>Figure 7</label>
<caption>
<title>The convergence curve of proposed methods</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="CMC_56823-fig-7.tif"/>
</fig>
<p>The computational complexity of the proposed RL technique primarily hinges on the intricacies of the RL algorithm itself and the optimization process facilitated by the IChOA. RL, particularly in environments modeled as MDPs, involves substantial computational effort due to the need to explore and learn optimal policies through interactions with the environment. The computational complexity of a DQN technique involves several key components, including the neural network architecture, the number of states, the number of actions, and the number of iterations required for convergence. Integrating the IChOA into this framework adds another layer of computational complexity. IChOA enhances the training process by optimizing the parameters of the RL network, leading to more robust policy learning. The complexity of IChOA, like other meta-heuristic algorithms, depends on the population size, the number of iterations, and the computational cost of evaluating the fitness function. In this paper, the total computational complexity (<inline-formula id="ieqn-110"><mml:math id="mml-ieqn-110"><mml:mi>C</mml:mi></mml:math></inline-formula>) of the proposed IChOA-RL model is calculated as <xref ref-type="disp-formula" rid="eqn-22">Eqs. (22)</xref>&#x2013;<xref ref-type="disp-formula" rid="eqn-24">(24)</xref>.
<disp-formula id="eqn-22"><label>(22)</label><mml:math id="mml-eqn-22" display="block"><mml:msub><mml:mrow><mml:mtext>C</mml:mtext></mml:mrow><mml:mrow><mml:mi>R</mml:mi><mml:mi>L</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mi>T</mml:mi><mml:mo>.</mml:mo><mml:mi>O</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>B</mml:mi><mml:mo>.</mml:mo><mml:mi>N</mml:mi><mml:mo>.</mml:mo><mml:mi>M</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></disp-formula>
<disp-formula id="eqn-23"><label>(23)</label><mml:math id="mml-eqn-23" display="block"><mml:msub><mml:mrow><mml:mtext>C</mml:mtext></mml:mrow><mml:mrow><mml:mi>I</mml:mi><mml:mi>C</mml:mi><mml:mi>h</mml:mi><mml:mi>O</mml:mi><mml:mi>A</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mi>O</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>P</mml:mi><mml:mo>&#x22C5;</mml:mo><mml:mi>D</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>+</mml:mo><mml:mi>G</mml:mi><mml:mo>&#x22C5;</mml:mo><mml:mi>O</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>P</mml:mi><mml:mo>&#x22C5;</mml:mo><mml:mi>F</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>+</mml:mo><mml:mi>G</mml:mi><mml:mo>&#x22C5;</mml:mo><mml:mi>O</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>P</mml:mi><mml:mi>l</mml:mi><mml:mi>o</mml:mi><mml:mi>g</mml:mi><mml:mi>P</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>+</mml:mo><mml:mi>G</mml:mi><mml:mo>&#x22C5;</mml:mo><mml:mi>O</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>P</mml:mi><mml:mo>&#x22C5;</mml:mo><mml:mi>U</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></disp-formula></p>
<p><disp-formula id="eqn-24"><label>(24)</label><mml:math id="mml-eqn-24" display="block"><mml:mi>C</mml:mi><mml:mo>=</mml:mo><mml:mrow><mml:mtext>H</mml:mtext></mml:mrow><mml:mo>.</mml:mo><mml:msub><mml:mrow><mml:mtext>C</mml:mtext></mml:mrow><mml:mrow><mml:mi>R</mml:mi><mml:mi>L</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:mrow><mml:mtext>H</mml:mtext></mml:mrow><mml:mo>.</mml:mo><mml:msub><mml:mrow><mml:mtext>C</mml:mtext></mml:mrow><mml:mrow><mml:mi>I</mml:mi><mml:mi>C</mml:mi><mml:mi>h</mml:mi><mml:mi>O</mml:mi><mml:mi>A</mml:mi></mml:mrow></mml:msub></mml:math></disp-formula>where <inline-formula id="ieqn-111"><mml:math id="mml-ieqn-111"><mml:msub><mml:mrow><mml:mtext>C</mml:mtext></mml:mrow><mml:mrow><mml:mi>R</mml:mi><mml:mi>L</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> is the computational complexity of RL, <inline-formula id="ieqn-112"><mml:math id="mml-ieqn-112"><mml:msub><mml:mrow><mml:mtext>C</mml:mtext></mml:mrow><mml:mrow><mml:mi>I</mml:mi><mml:mi>C</mml:mi><mml:mi>h</mml:mi><mml:mi>O</mml:mi><mml:mi>A</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> is the computational complexity of IChOA, <inline-formula id="ieqn-113"><mml:math id="mml-ieqn-113"><mml:mi>T</mml:mi></mml:math></inline-formula> is the total number of iterations for convergence, <inline-formula id="ieqn-114"><mml:math id="mml-ieqn-114"><mml:mi>B</mml:mi></mml:math></inline-formula> is the mini-batch size used during training, <inline-formula id="ieqn-115"><mml:math id="mml-ieqn-115"><mml:mi>N</mml:mi></mml:math></inline-formula> is the total number of neurons in the neural network, <inline-formula id="ieqn-116"><mml:math id="mml-ieqn-116"><mml:mi>M</mml:mi></mml:math></inline-formula> is the average number of connections per neuron, <inline-formula id="ieqn-117"><mml:math id="mml-ieqn-117"><mml:mi>P</mml:mi></mml:math></inline-formula> is the population size, <inline-formula id="ieqn-118"><mml:math id="mml-ieqn-118"><mml:mi>G</mml:mi></mml:math></inline-formula> is the number of generations, <inline-formula id="ieqn-119"><mml:math id="mml-ieqn-119"><mml:mi>F</mml:mi></mml:math></inline-formula> is the complexity of the fitness function, <inline-formula id="ieqn-120"><mml:math id="mml-ieqn-120"><mml:mi>D</mml:mi></mml:math></inline-formula> is the number of dimensions, <inline-formula id="ieqn-121"><mml:math id="mml-ieqn-121"><mml:mi>U</mml:mi></mml:math></inline-formula> is the complexity of updating positions for one individual, and <inline-formula id="ieqn-122"><mml:math id="mml-ieqn-122"><mml:mrow><mml:mtext>H</mml:mtext></mml:mrow></mml:math></inline-formula> denotes the number of times the IChOA process is invoked within the DQL training process. The proposed IChOA-RL technique effectively addresses scalability challenges in smart grid implementations by integrating the IChOA for efficient training, enabling rapid convergence and adaptive policy updates in response to real-time data. This approach optimizes resource use, ensuring the method can operate within the constraints of existing smart grid infrastructure, from high-power servers to lower-power edge devices. Extensive simulations validate the method&#x2019;s ability to maintain high performance and adaptability across various network conditions and scales, demonstrating its robustness in managing large-scale smart grid networks. This scalability is essential for widespread deployment in complex smart grid environments, where efficient resource management and dynamic adaptability are crucial.</p>
</sec>
<sec id="s4">
<label>4</label>
<title>Conclusions</title>
<p>This paper has presented an in-depth exploration of a novel RL-based strategy for optimizing secrecy performance in an SG environment utilizing NOMA communication. By integrating IChOA to adjust the parameters of a fully connected neural network within the RL framework, we have demonstrated a significant enhancement in the secrecy rates across a range of operational scenarios. The IChOA-RL model was compared against eight other ML architectures. The IChOA-RL model achieved the highest accuracy, recording 97.41% on the validation datasets, making it the most effective approach. Our simulation results have conclusively shown that the IChOA-RL method outperforms traditional ML approaches such as RNN, LSTM, KNN, and SVM, as well as standard RL techniques. The robustness of IChOA-RL was particularly evident in its superior performance at higher power allocation coefficients and transmission power levels, showcasing its potential for practical implementation in real-world SG systems. The scalability of the NOMA communication system was also put to the test, giving insights into the relationship of the number of NOMA SMs with the utilization of the power domain for enhancing secrecy rates, as indicated by the higher slopes in the secrecy rate curves as the number of SMs. This finding underscores the importance of considering user density in designing secure SG communications. Furthermore, the study has contributed to the body of knowledge by highlighting the critical role of sophisticated optimization algorithms in RL. The application of IChOA to the training process of the neural network has been shown to significantly accelerate learning and convergence to optimal policies, ensuring efficient use of power resources while maintaining high levels of security.</p>
<p>Implementing the proposed IChOA-RL technique in real-world SG environments faces several challenges. The significant computational complexity and resource demands of the hybrid method require substantial processing power and memory, making real-time applications potentially costly and impractical. Scalability is also a concern, as the SG&#x2019;s vast network size demands efficient handling without performance degradation or exponential computational increases. Ensuring real-time adaptability and convergence is crucial, as the RL algorithm must quickly adapt to the dynamic conditions of the SG to maintain optimal performance. Integration with existing SG systems poses further challenges, requiring seamless incorporation without disrupting current operations while ensuring interoperability and regulatory compliance. While the paper addresses some of the practical challenges associated with implementing the IChOA-RL approach in SG environments, there are additional considerations that future research could explore in greater depth. These include real-time processing requirements, data quality issues, energy consumption, and security concerns. Addressing these challenges through innovative solutions and rigorous testing will be essential to realize the full benefits of the proposed method in enhancing the security and efficiency of SG communications.</p>
<p>Moreover, the IChOA-RL method may face difficulties in converging to a global optimum in highly complex or non-convex problem spaces, particularly if the initial conditions or parameter settings are not well-tuned. This is a common challenge shared with other evolutionary algorithms and advanced optimization methods like RL, RNN, LSTM, SVM, KNN, GWO, and I-CSA, which also require careful parameter tuning and can suffer from premature convergence or getting trapped in local optima. However, compared to these algorithms, IChOA-RL&#x2019;s advantage lies in its ability to adapt more dynamically to changing conditions, albeit at the cost of potentially higher computational demands. In summary, while the IChOA-RL method offers superior performance in terms of adaptability and scalability, its limitations include increased computational requirements and the need for careful tuning to ensure convergence, challenges that are also in other state-of-the-art ML algorithms. Additionally, several unresolved questions regarding the IChOA and RL underscore the need for further investigation in this field. Future studies on the IChOA should delve into refining the algorithm&#x2019;s specific parameters and thresholds. Such research could involve detailed assessments of how parameter variations affect the algorithm&#x2019;s rate of convergence, the quality of solutions, and computational efficiency. Researchers might consider employing strategies like meta-heuristic parameter tuning or adaptive adjustments to dynamically optimize parameters during the process. Meanwhile, the development of RL models is likely to evolve towards overcoming the challenge posed by the scarcity of labeled data. This shift may lead to a stronger focus on semi-supervised and unsupervised learning methods. Future efforts could also examine the integration of IChOA into these learning frameworks to better leverage unlabeled data, thus enhancing the performance and generalization capabilities of RL models.</p>
</sec>
</body>
<back>
<glossary content-type="abbreviations" id="glossary-1">
<title>Abbreviations</title>
<def-list>
<def-item>
<term>AWGN</term>
<def>
<p>Additive White Gaussian Noise</p>
</def>
</def-item>
<def-item>
<term>ABC</term>
<def>
<p>Advanced Solana Blockchain</p>
</def>
</def-item>
<def-item>
<term>CSI</term>
<def>
<p>Channel State Information</p>
</def>
</def-item>
<def-item>
<term><inline-formula id="ieqn-2"><mml:math id="mml-ieqn-2"><mml:msup><mml:mi>R</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula></term>
<def>
<p>Coefficient of Determination</p>
</def>
</def-item>
<def-item>
<term>DL</term>
<def>
<p>Deep Learning</p>
</def>
</def-item>
<def-item>
<term>DNN</term>
<def>
<p>Deep Neural Network</p>
</def>
</def-item>
<def-item>
<term>DQN</term>
<def>
<p>Deep Q Network</p>
</def>
</def-item>
<def-item>
<term>DERs</term>
<def>
<p>Distributed Energy Resources</p>
</def>
</def-item>
<def-item>
<term>DEAP</term>
<def>
<p>Distributed Evolutionary Algorithms in Python</p>
</def>
</def-item>
<def-item>
<term>GWO</term>
<def>
<p>Grey Wolf Optimizer</p>
</def>
</def-item>
<def-item>
<term>IChOA</term>
<def>
<p>Improved Chimp Optimization Algorithm</p>
</def>
</def-item>
<def-item>
<term>I-CSA</term>
<def>
<p>Improved Crow Search Algorithm</p>
</def>
</def-item>
<def-item>
<term>IoT</term>
<def>
<p>Internet of Things</p>
</def>
</def-item>
<def-item>
<term>KNN</term>
<def>
<p>K-Nearest Neighbors</p>
</def>
</def-item>
<def-item>
<term>LSTM</term>
<def>
<p>Long Short-Term Memory</p>
</def>
</def-item>
<def-item>
<term>ML</term>
<def>
<p>Machine Learning</p>
</def>
</def-item>
<def-item>
<term>MDP</term>
<def>
<p>Markov Decision Process</p>
</def>
</def-item>
<def-item>
<term>MIMO</term>
<def>
<p>Multiple-Input Multiple-Output</p>
</def>
</def-item>
<def-item>
<term>NG</term>
<def>
<p>Neighborhood Gateway</p>
</def>
</def-item>
<def-item>
<term>NAN</term>
<def>
<p>Neighborhood Area Networks</p>
</def>
</def-item>
<def-item>
<term>NOMA</term>
<def>
<p>Non-Organic Multiple Access</p>
</def>
</def-item>
<def-item>
<term>PLS</term>
<def>
<p>Physical Layer Security</p>
</def>
</def-item>
<def-item>
<term>PLC</term>
<def>
<p>Power Line Communication</p>
</def>
</def-item>
<def-item>
<term>RIS</term>
<def>
<p>Reconfigurable Intelligent Surfaces</p>
</def>
</def-item>
<def-item>
<term>RNN</term>
<def>
<p>Recurrent Neural Network</p>
</def>
</def-item>
<def-item>
<term>RL</term>
<def>
<p>Reinforcement Learning</p>
</def>
</def-item>
<def-item>
<term>RMSE</term>
<def>
<p>Root Mean Square Error</p>
</def>
</def-item>
<def-item>
<term>SOP</term>
<def>
<p>Secrecy Outage Probability</p>
</def>
</def-item>
<def-item>
<term>SNR</term>
<def>
<p>Signal-to-Noise Ratio</p>
</def>
</def-item>
<def-item>
<term>SMs</term>
<def>
<p>Smart Meters</p>
</def>
</def-item>
<def-item>
<term>SG</term>
<def>
<p>Smart Grid</p>
</def>
</def-item>
<def-item>
<term>BCWSN</term>
<def>
<p>Solana Blockchain-based Industrial Wireless Sensor Network</p>
</def>
</def-item>
<def-item>
<term>SIC</term>
<def>
<p>Successive Interference Cancellation</p>
</def>
</def-item>
<def-item>
<term>SVM</term>
<def>
<p>Support Vector Machine</p>
</def>
</def-item>
<def-item>
<term>WAN</term>
<def>
<p>Wide Area Network</p>
</def>
</def-item>
</def-list>
</glossary>
<ack>
<p>None.</p>
</ack>
<sec><title>Funding Statement</title>
<p>The work described in this paper has been developed within the project PRESECREL. We would like to acknowledge the financial support of the Ministerio de Ciencia e Investigaci&#x00F3;n (Spain), in relation to the Plan Estatal de Investigaci&#x00F3;n Cient&#x00ED;fica y T&#x00E9;cnica y de Innovaci&#x00F3;n 2017&#x2013;2020.</p>
</sec>
<sec><title>Author Contributions</title>
<p>The authors confirm their contribution to the paper as follows: study conception and design: Mehrdad Shoeibi, Mohammad Mehdi Sharifi Nevisi, Sarvenaz Sadat Khatami, Diego Mart&#x00ED;n; data collection: Mehrdad Shoeibi, Mohammad Mehdi Sharifi Nevisi, Sina Aghakhani; analysis and interpretation of results: Mehrdad Shoeibi, Sarvenaz Sadat Khatami, Sepehr Soltani; draft manuscript preparation: Mehrdad Shoeibi, Mohammad Mehdi Sharifi Nevisi, Diego Mart&#x00ED;n, Sina Aghakhani; supervision: Diego Mart&#x00ED;n. All authors reviewed the results and approved the final version of the manuscript.</p>
</sec>
<sec sec-type="data-availability"><title>Availability of Data and Materials</title>
<p>The data that support the findings of this study are available from the corresponding author, upon reasonable request.</p>
</sec>
<sec><title>Ethics Approval</title>
<p>Not applicable.</p>
</sec>
<sec sec-type="COI-statement"><title>Conflicts of Interest</title>
<p>The authors declare that they have no conflicts of interest to report regarding the present study.</p>
</sec>
<ref-list content-type="authoryear">
<title>References</title>
<ref id="ref-1"><label>[1]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>T.</given-names> <surname>Docquier</surname></string-name>, <string-name><given-names>Y. Q.</given-names> <surname>Song</surname></string-name>, <string-name><given-names>V.</given-names> <surname>Chevrier</surname></string-name>, <string-name><given-names>L.</given-names> <surname>Pontnau</surname></string-name>, and <string-name><given-names>A.</given-names> <surname>Ahmed-Nacer</surname></string-name></person-group>, &#x201C;<article-title>Performance evaluation methodologies for smart grid substation communication networks: A survey</article-title>,&#x201D; <source>Comput. Commun.</source>, vol. <volume>198</volume>, no. <issue>4</issue>, pp. <fpage>228</fpage>&#x2013;<lpage>246</lpage>, <year>2023</year>. doi: <pub-id pub-id-type="doi">10.1016/j.comcom.2022.11.005</pub-id>.</mixed-citation></ref>
<ref id="ref-2"><label>[2]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>M.</given-names> <surname>Kaveh</surname></string-name>, <string-name><given-names>M. R.</given-names> <surname>Mosavi</surname></string-name>, <string-name><given-names>D.</given-names> <surname>Mart&#x00ED;n</surname></string-name>, and <string-name><given-names>S.</given-names> <surname>Aghapour</surname></string-name></person-group>, &#x201C;<article-title>An efficient authentication protocol for smart grid communication based on on-chip-error-correcting physical unclonable function</article-title>,&#x201D; <source>Sustain. Energy, Grids Netw.</source>, vol. <volume>36</volume>, <year>2023</year>, Art. no. 101228.</mixed-citation></ref>
<ref id="ref-3"><label>[3]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>S.</given-names> <surname>Li</surname></string-name>, <string-name><given-names>Y.</given-names> <surname>Wu</surname></string-name>, <string-name><given-names>Y.</given-names> <surname>Zhang</surname></string-name>, <string-name><given-names>S.</given-names> <surname>Duan</surname></string-name>, and <string-name><given-names>J.</given-names> <surname>Xu</surname></string-name></person-group>, &#x201C;<article-title>Privacy transmission via joint active and passive beamforming optimization for RIS-Aided NOMA-IoMT networks</article-title>,&#x201D; <source>IEEE Trans. Consum. Electron.</source>, vol. <volume>70</volume>, no. <issue>1</issue>, pp. <fpage>2290</fpage>&#x2013;<lpage>2302</lpage>, <year>2024</year>. doi: <pub-id pub-id-type="doi">10.1109/TCE.2024.3349618</pub-id>.</mixed-citation></ref>
<ref id="ref-4"><label>[4]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>S.</given-names> <surname>Aghapour</surname></string-name>, <string-name><given-names>M.</given-names> <surname>Kaveh</surname></string-name>, <string-name><given-names>M. R.</given-names> <surname>Mosavi</surname></string-name>, and <string-name><given-names>D.</given-names> <surname>Mart&#x00ED;n</surname></string-name></person-group>, &#x201C;<article-title>An ultra-lightweight mutual authentication scheme for smart grid two-way communications</article-title>,&#x201D; <source>IEEE Access</source>, vol. <volume>9</volume>, pp. <fpage>74562</fpage>&#x2013;<lpage>74573</lpage>, <year>2021</year>. doi: <pub-id pub-id-type="doi">10.1109/ACCESS.2021.3080835</pub-id>.</mixed-citation></ref>
<ref id="ref-5"><label>[5]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>M.</given-names> <surname>Alonso</surname></string-name>, <string-name><given-names>H.</given-names> <surname>Amaris</surname></string-name>, <string-name><given-names>D.</given-names> <surname>Alcala</surname></string-name>, and <string-name><given-names>R. D. M.</given-names> <surname>Florez</surname></string-name></person-group>, &#x201C;<article-title>Smart sensors for smart grid reliability</article-title>,&#x201D; <source>Sensors</source>, vol. <volume>20</volume>, no. <issue>8</issue>, <year>2020</year>, Art. no. 2187. doi: <pub-id pub-id-type="doi">10.3390/s20082187</pub-id>; <pub-id pub-id-type="pmid">32294923</pub-id></mixed-citation></ref>
<ref id="ref-6"><label>[6]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>E. S.</given-names> <surname>Hassan</surname></string-name> and <string-name><given-names>A. S.</given-names> <surname>Elsafrawey</surname></string-name></person-group>, &#x201C;<article-title>Cooperative secrecy techniques for improving physical layer security in NOMA-based PLC networks</article-title>,&#x201D; <source>IETE Tech. Rev.</source>, vol. <volume>40</volume>, no. <issue>6</issue>, pp. <fpage>755</fpage>&#x2013;<lpage>766</lpage>, <year>2023</year>. doi: <pub-id pub-id-type="doi">10.1080/02564602.2023.2167741</pub-id>.</mixed-citation></ref>
<ref id="ref-7"><label>[7]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>S.</given-names> <surname>Miri</surname></string-name>, <string-name><given-names>M.</given-names> <surname>Kaveh</surname></string-name>, <string-name><given-names>H. S.</given-names> <surname>Shahhoseini</surname></string-name>, <string-name><given-names>M. R.</given-names> <surname>Mosavi</surname></string-name>, and <string-name><given-names>S.</given-names> <surname>Aghapour</surname></string-name></person-group>, &#x201C;<article-title>On the security of an ultra-lightweight and secure scheme for communications of smart metres and neighbourhood gateways by utilisation of an ARM Cortex-M microcontroller</article-title>,&#x201D; <source>IET Inf. Secur.</source>, vol. <volume>17</volume>, no. <issue>3</issue>, pp. <fpage>544</fpage>&#x2013;<lpage>551</lpage>, <year>2023</year>. doi: <pub-id pub-id-type="doi">10.1049/ise2.12108</pub-id>.</mixed-citation></ref>
<ref id="ref-8"><label>[8]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>S.</given-names> <surname>Mounchili</surname></string-name> and <string-name><given-names>S.</given-names> <surname>Hamouda</surname></string-name></person-group>, &#x201C;<article-title>Pairing distance resolution and power control for massive connectivity improvement in NOMA systems</article-title>,&#x201D; <source>IEEE Trans. Vehicular Technol.</source>, vol. <volume>69</volume>, no. <issue>4</issue>, pp. <fpage>4093</fpage>&#x2013;<lpage>4103</lpage>, <year>2020</year>. doi: <pub-id pub-id-type="doi">10.1109/TVT.2020.2975539</pub-id>.</mixed-citation></ref>
<ref id="ref-9"><label>[9]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>F. R.</given-names> <surname>Ghadi</surname></string-name>, <string-name><given-names>M.</given-names> <surname>Kaveh</surname></string-name>, and <string-name><given-names>D.</given-names> <surname>Mart&#x00ED;n</surname></string-name></person-group>, &#x201C;<article-title>Performance analysis of RIS/STAR-IOS-aided V2V NOMA/OMA communications over composite fading channels</article-title>,&#x201D; <source>IEEE Trans. Intell. Veh.</source>, vol. <volume>9</volume>, no. <issue>1</issue>, pp. <fpage>279</fpage>&#x2013;<lpage>286</lpage>, <year>2023</year>. doi: <pub-id pub-id-type="doi">10.1109/TIV.2023.3337898</pub-id>.</mixed-citation></ref>
<ref id="ref-10"><label>[10]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>M.</given-names> <surname>Zeng</surname></string-name>, <string-name><given-names>A.</given-names> <surname>Yadav</surname></string-name>, <string-name><given-names>O. A.</given-names> <surname>Dobre</surname></string-name>, and <string-name><given-names>H. V.</given-names> <surname>Poor</surname></string-name></person-group>, &#x201C;<article-title>Energy-efficient joint user-RB association and power allocation for uplink hybrid NOMA-OMA</article-title>,&#x201D; <source>IEEE Internet Things J.</source>, vol. <volume>6</volume>, no. <issue>3</issue>, pp. <fpage>5119</fpage>&#x2013;<lpage>5131</lpage>, <year>2019</year>. doi: <pub-id pub-id-type="doi">10.1109/JIOT.2019.2896946</pub-id>.</mixed-citation></ref>
<ref id="ref-11"><label>[11]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>X.</given-names> <surname>Tian</surname></string-name> <etal>et al.</etal></person-group>, &#x201C;<article-title>Power allocation scheme for maximizing spectral efficiency and energy efficiency tradeoff for uplink NOMA systems in B5G/6G</article-title>,&#x201D; <source>Phys. Commun.</source>, vol. <volume>43</volume>, <year>2020</year>, Art. no. 101227. doi: <pub-id pub-id-type="doi">10.1016/j.phycom.2020.101227</pub-id>.</mixed-citation></ref>
<ref id="ref-12"><label>[12]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>F.</given-names> <surname>Fang</surname></string-name>, <string-name><given-names>Z.</given-names> <surname>Ding</surname></string-name>, <string-name><given-names>W.</given-names> <surname>Liang</surname></string-name>, and <string-name><given-names>H.</given-names> <surname>Zhang</surname></string-name></person-group>, &#x201C;<article-title>Optimal energy efficient power allocation with user fairness for uplink MC-NOMA systems</article-title>,&#x201D; <source>IEEE Wirel. Commun. Lett.</source>, vol. <volume>8</volume>, no. <issue>4</issue>, pp. <fpage>1133</fpage>&#x2013;<lpage>1136</lpage>, <year>2019</year>. doi: <pub-id pub-id-type="doi">10.1109/LWC.2019.2908912</pub-id>.</mixed-citation></ref>
<ref id="ref-13"><label>[13]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>M.</given-names> <surname>Ghiasi</surname></string-name> <etal>et al.</etal></person-group>, &#x201C;<article-title>A comprehensive review of cyber-attacks and defense mechanisms for improving security in smart grid energy systems: Past, present and future</article-title>,&#x201D; <source>Elect. Power Syst. Res.</source>, vol. <volume>215</volume>, <year>2023</year>, Art. no. 108975. doi: <pub-id pub-id-type="doi">10.1016/j.epsr.2022.108975</pub-id>.</mixed-citation></ref>
<ref id="ref-14"><label>[14]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>Z.</given-names> <surname>Ali</surname></string-name>, <string-name><given-names>G. A. S.</given-names> <surname>Sidhu</surname></string-name>, <string-name><given-names>F.</given-names> <surname>Gao</surname></string-name>, <string-name><given-names>J.</given-names> <surname>Jiang</surname></string-name>, and <string-name><given-names>X.</given-names> <surname>Wang</surname></string-name></person-group>, &#x201C;<article-title>Deep learning based power optimizing for NOMA based relay aided D2D transmissions</article-title>,&#x201D; <source>IEEE Trans. Cogn. Commun. Netw.</source>, vol. <volume>7</volume>, no. <issue>3</issue>, pp. <fpage>917</fpage>&#x2013;<lpage>928</lpage>, <year>2021</year>. doi: <pub-id pub-id-type="doi">10.1109/TCCN.2021.3049475</pub-id>.</mixed-citation></ref>
<ref id="ref-15"><label>[15]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>M.</given-names> <surname>Kaveh</surname></string-name> and <string-name><given-names>M. R.</given-names> <surname>Mosavi</surname></string-name></person-group>, &#x201C;<article-title>A lightweight mutual authentication for smart grid neighborhood area network communications based on physically unclonable function</article-title>,&#x201D; <source>IEEE Syst. J.</source>, vol. <volume>14</volume>, no. <issue>3</issue>, pp. <fpage>4535</fpage>&#x2013;<lpage>4544</lpage>, <year>2020</year>. doi: <pub-id pub-id-type="doi">10.1109/JSYST.2019.2963235</pub-id>.</mixed-citation></ref>
<ref id="ref-16"><label>[16]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>L.</given-names> <surname>Yang</surname></string-name> <etal>et al.</etal></person-group>, &#x201C;<article-title>Secrecy performance analysis of RIS-aided wireless communication systems</article-title>,&#x201D; <source>IEEE Trans. Vehicular Technol.</source>, vol. <volume>69</volume>, no. <issue>10</issue>, pp. <fpage>12296</fpage>&#x2013;<lpage>12300</lpage>, <year>2020</year>. doi: <pub-id pub-id-type="doi">10.1109/TVT.2020.3007521</pub-id>.</mixed-citation></ref>
<ref id="ref-17"><label>[17]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>D.</given-names> <surname>Wang</surname></string-name> <etal>et al.</etal></person-group>, &#x201C;<article-title>Uplink secrecy performance of RIS-based RF/FSO three-dimension heterogeneous networks</article-title>,&#x201D; <source>IEEE Trans. Wirel. Commun.</source>, vol. <volume>23</volume>, no. <issue>3</issue>, pp. <fpage>1798</fpage>&#x2013;<lpage>1809</lpage>, <year>2023</year>. doi: <pub-id pub-id-type="doi">10.1109/TWC.2023.3292073</pub-id>.</mixed-citation></ref>
<ref id="ref-18"><label>[18]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>M.</given-names> <surname>Kaveh</surname></string-name>, <string-name><given-names>Z.</given-names> <surname>Yan</surname></string-name>, and <string-name><given-names>R.</given-names> <surname>J&#x00E4;ntti</surname></string-name></person-group>, &#x201C;<article-title>Secrecy performance analysis of RIS-aided smart grid communications</article-title>,&#x201D; <source>IEEE Trans. Ind. Inform.</source>, vol. <volume>20</volume>, no. <issue>3</issue>, pp. <fpage>5415</fpage>&#x2013;<lpage>5427</lpage>, <year>2024</year>. doi: <pub-id pub-id-type="doi">10.1109/TII.2023.3333842</pub-id>.</mixed-citation></ref>
<ref id="ref-19"><label>[19]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>H.</given-names> <surname>Lei</surname></string-name> <etal>et al.</etal></person-group>, &#x201C;<article-title>Secrecy outage performance analysis for uplink CR-NOMA systems with hybrid SIC</article-title>,&#x201D; <source>IEEE Internet Things J.</source>, vol. <volume>10</volume>, no. <issue>15</issue>, pp. <fpage>13181</fpage>&#x2013;<lpage>13195</lpage>, <year>2023</year>. doi: <pub-id pub-id-type="doi">10.1109/JIOT.2023.3261308</pub-id>.</mixed-citation></ref>
<ref id="ref-20"><label>[20]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>F. R.</given-names> <surname>Ghadi</surname></string-name>, <string-name><given-names>F. J.</given-names> <surname>L&#x00F3;pez-Mart&#x00ED;nez</surname></string-name>, <string-name><given-names>W. P.</given-names> <surname>Zhu</surname></string-name>, and <string-name><given-names>J. M.</given-names> <surname>Gorce</surname></string-name></person-group>, &#x201C;<article-title>The impact of side information on physical layer security under correlated fading channels</article-title>,&#x201D; <source>IEEE Trans. Inf. Forensics Secur.</source>, vol. <volume>17</volume>, pp. <fpage>3626</fpage>&#x2013;<lpage>3636</lpage>, <year>2022</year>. doi: <pub-id pub-id-type="doi">10.1109/TIFS.2022.3212198</pub-id>.</mixed-citation></ref>
<ref id="ref-21"><label>[21]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>Y.</given-names> <surname>Pei</surname></string-name>, <string-name><given-names>X.</given-names> <surname>Yue</surname></string-name>, <string-name><given-names>C.</given-names> <surname>Huang</surname></string-name>, and <string-name><given-names>Z.</given-names> <surname>Lu</surname></string-name></person-group>, &#x201C;<article-title>Secrecy performance analysis of RIS assisted ambient backscatter communication networks</article-title>,&#x201D; <source>IEEE Trans. Green Commun. Netw.</source>, vol. <volume>8</volume>, no. <issue>3</issue>, p. <fpage>1</fpage>, <year>2024</year>. doi: <pub-id pub-id-type="doi">10.1109/TGCN.2024.3365692</pub-id>.</mixed-citation></ref>
<ref id="ref-22"><label>[22]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>M.</given-names> <surname>Kaveh</surname></string-name>, <string-name><given-names>F.</given-names> <surname>Rostami Ghadi</surname></string-name>, <string-name><given-names>R.</given-names> <surname>J&#x00E4;ntti</surname></string-name>, and <string-name><given-names>Z.</given-names> <surname>Yan</surname></string-name></person-group>, &#x201C;<article-title>Secrecy performance analysis of backscatter communications with side information</article-title>,&#x201D; <source>Sensors</source>, vol. <volume>23</volume>, no. <issue>20</issue>, <year>2023</year>, Art. no. 8358. doi: <pub-id pub-id-type="doi">10.3390/s23208358</pub-id>; <pub-id pub-id-type="pmid">37896453</pub-id></mixed-citation></ref>
<ref id="ref-23"><label>[23]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>V. L.</given-names> <surname>Nguyen</surname></string-name>, <string-name><given-names>D. B.</given-names> <surname>Ha</surname></string-name>, <string-name><given-names>V. T.</given-names> <surname>Truong</surname></string-name>, <string-name><given-names>D. D.</given-names> <surname>Tran</surname></string-name>, and <string-name><given-names>S.</given-names> <surname>Chatzinotas</surname></string-name></person-group>, &#x201C;<article-title>Secure communication for RF energy harvesting NOMA relaying networks with relay-user selection scheme and optimization</article-title>,&#x201D; <source>Mob. Netw. Appl.</source>, vol. <volume>27</volume>, no. <issue>4</issue>, pp. <fpage>1719</fpage>&#x2013;<lpage>1733</lpage>, <year>2022</year>. doi: <pub-id pub-id-type="doi">10.1007/s11036-022-01929-3</pub-id>.</mixed-citation></ref>
<ref id="ref-24"><label>[24]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>C. E.</given-names> <surname>Garcia</surname></string-name>, <string-name><given-names>M. R.</given-names> <surname>Camana</surname></string-name>, and <string-name><given-names>I.</given-names> <surname>Koo</surname></string-name></person-group>, &#x201C;<article-title>Ensemble learning aided QPSO-based framework for secrecy energy efficiency in FD CR-NOMA systems</article-title>,&#x201D; <source>IEEE Trans. Green Commun. Netw.</source>, vol. <volume>7</volume>, no. <issue>2</issue>, pp. <fpage>649</fpage>&#x2013; <lpage>667</lpage>, <year>2022</year>. doi: <pub-id pub-id-type="doi">10.1109/TGCN.2022.3219111</pub-id>.</mixed-citation></ref>
<ref id="ref-25"><label>[25]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>S.</given-names> <surname>Thakur</surname></string-name> and <string-name><given-names>S.</given-names> <surname>Thakor</surname></string-name></person-group>, &#x201C;<article-title>Secrecy performance optimization of SWIPT wireless networks in partial secrecy regime</article-title>,&#x201D; <source>IEEE Trans. Green Commun. Netw.</source>, <year>2024</year>. doi: <pub-id pub-id-type="doi">10.1109/TGCN.2024.3464241</pub-id>.</mixed-citation></ref>
<ref id="ref-26"><label>[26]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>Z.</given-names> <surname>Chu</surname></string-name> <etal>et al.</etal></person-group>, &#x201C;<article-title>Secrecy rate optimization for intelligent reflecting surface assisted MIMO system</article-title>,&#x201D; <source>IEEE Trans. Inf. Forensics Secur.</source>, vol. <volume>16</volume>, pp. <fpage>1655</fpage>&#x2013;<lpage>1669</lpage>, <year>2020</year>. doi: <pub-id pub-id-type="doi">10.1109/TIFS.2020.3038994</pub-id>.</mixed-citation></ref>
<ref id="ref-27"><label>[27]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>G.</given-names> <surname>Sharma</surname></string-name>, <string-name><given-names>N.</given-names> <surname>Pandey</surname></string-name>, <string-name><given-names>A.</given-names> <surname>Singh</surname></string-name>, and <string-name><given-names>R. K.</given-names> <surname>Mallik</surname></string-name></person-group>, &#x201C;<article-title>Secrecy optimization for diffusion-based molecular timing channels</article-title>,&#x201D; <source>IEEE Trans. Mol., Biol. Multi-Scale Commun.</source>, vol. <volume>7</volume>, no. <issue>4</issue>, pp. <fpage>253</fpage>&#x2013;<lpage>261</lpage>, <year>2021</year>. doi: <pub-id pub-id-type="doi">10.1109/TMBMC.2021.3054907</pub-id>.</mixed-citation></ref>
<ref id="ref-28"><label>[28]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>W.</given-names> <surname>Yu</surname></string-name>, <string-name><given-names>A.</given-names> <surname>Chorti</surname></string-name>, <string-name><given-names>L.</given-names> <surname>Musavian</surname></string-name>, <string-name><given-names>H. V.</given-names> <surname>Poor</surname></string-name>, and <string-name><given-names>Q.</given-names> <surname>Ni</surname></string-name></person-group>, &#x201C;<article-title>Effective secrecy rate for a downlink NOMA network</article-title>,&#x201D; <source>IEEE Trans. Wirel. Commun.</source>, vol. <volume>18</volume>, no. <issue>12</issue>, pp. <fpage>5673</fpage>&#x2013;<lpage>5690</lpage>, <year>2019</year>. doi: <pub-id pub-id-type="doi">10.1109/TWC.2019.2938515</pub-id>.</mixed-citation></ref>
<ref id="ref-29"><label>[29]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>A.</given-names> <surname>Camponogara</surname></string-name>, <string-name><given-names>H. V.</given-names> <surname>Poor</surname></string-name>, and <string-name><given-names>M. V.</given-names> <surname>Ribeiro</surname></string-name></person-group>, &#x201C;<article-title>The complete and incomplete low-bit-rate hybrid PLC/wireless channel models: Physical layer security analyses</article-title>,&#x201D; <source>IEEE Internet Things</source>, vol. <volume>6</volume>, no. <issue>2</issue>, pp. <fpage>2760</fpage>&#x2013;<lpage>2769</lpage>, <year>2019</year>. doi: <pub-id pub-id-type="doi">10.1109/JIOT.2018.2874377</pub-id>.</mixed-citation></ref>
<ref id="ref-30"><label>[30]</label><mixed-citation publication-type="conf-proc"><person-group person-group-type="author"><string-name><given-names>A.</given-names> <surname>Salem</surname></string-name>, <string-name><given-names>K. M.</given-names> <surname>Rabie</surname></string-name>, <string-name><given-names>K. A.</given-names> <surname>Hamdi</surname></string-name>, <string-name><given-names>E.</given-names> <surname>Alsusa</surname></string-name>, and <string-name><given-names>A. M.</given-names> <surname>Tonello</surname></string-name></person-group>, &#x201C;<article-title>Physical layer security of cooperative relaying power-line communication systems</article-title>,&#x201D; in <conf-name>2016 Int. Symp. Power Line Commun. App. (ISPLC)</conf-name>, <publisher-loc>Bottrop, Germany</publisher-loc>, <year>2016</year>, pp. <fpage>185</fpage>&#x2013;<lpage>189</lpage>.</mixed-citation></ref>
<ref id="ref-31"><label>[31]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>A.</given-names> <surname>Salem</surname></string-name>, <string-name><given-names>K. A.</given-names> <surname>Hamdi</surname></string-name>, and <string-name><given-names>E.</given-names> <surname>Alsusa</surname></string-name></person-group>, &#x201C;<article-title>Physical layer security over correlated log-normal cooperative power line communication channels</article-title>,&#x201D; <source>IEEE Access</source>, vol. <volume>5</volume>, pp. <fpage>13909</fpage>&#x2013;<lpage>13921</lpage>, <year>2017</year>. doi: <pub-id pub-id-type="doi">10.1109/ACCESS.2017.2729784</pub-id>.</mixed-citation></ref>
<ref id="ref-32"><label>[32]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>K. O.</given-names> <surname>Odeyemi</surname></string-name>, <string-name><given-names>P. A.</given-names> <surname>Owolawi</surname></string-name>, and <string-name><given-names>O. O.</given-names> <surname>Olakanmi</surname></string-name></person-group>, &#x201C;<article-title>Secure transmission in smart grid dynamic wide area network by exploiting full-duplex jamming scheme</article-title>,&#x201D; <source>Trans. Emerg. Telecomm. Technol.</source>, vol. <volume>34</volume>, no. <issue>1</issue>, <year>2023, Art. no. e4657</year>. doi: <pub-id pub-id-type="doi">10.1002/ett.4657</pub-id>.</mixed-citation></ref>
<ref id="ref-33"><label>[33]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>M.</given-names> <surname>Atallah</surname></string-name>, <string-name><given-names>M. S.</given-names> <surname>Alam</surname></string-name>, and <string-name><given-names>G.</given-names> <surname>Kaddoum</surname></string-name></person-group>, &#x201C;<article-title>Secrecy analysis of wireless sensor network in smart grid with destination assisted jamming</article-title>,&#x201D; <source>IET Commun.</source>, vol. <volume>13</volume>, no. <issue>12</issue>, pp. <fpage>1748</fpage>&#x2013;<lpage>1752</lpage>, <year>2019</year>. doi: <pub-id pub-id-type="doi">10.1049/iet-com.2018.5344</pub-id>.</mixed-citation></ref>
<ref id="ref-34"><label>[34]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>A.</given-names> <surname>El-Shafie</surname></string-name>, <string-name><given-names>D.</given-names> <surname>Niyato</surname></string-name>, <string-name><given-names>R.</given-names> <surname>Hamila</surname></string-name>, and <string-name><given-names>N.</given-names> <surname>Al-Dhahir</surname></string-name></person-group>, &#x201C;<article-title>Impact of the wireless network&#x2019;s PHY security and reliability on demand-side management cost in the smart grid</article-title>,&#x201D; <source>IEEE Access</source>, vol. <volume>5</volume>, pp. <fpage>5678</fpage>&#x2013;<lpage>5689</lpage>, <year>2017</year>. doi: <pub-id pub-id-type="doi">10.1109/ACCESS.2017.2695520</pub-id>.</mixed-citation></ref>
<ref id="ref-35"><label>[35]</label><mixed-citation publication-type="conf-proc"><person-group person-group-type="author"><string-name><given-names>V.</given-names> <surname>Mohan</surname></string-name>, <string-name><given-names>A.</given-names> <surname>Mathur</surname></string-name>, <string-name><given-names>V.</given-names> <surname>Aishwarya</surname></string-name>, and <string-name><given-names>S.</given-names> <surname>Bhargav</surname></string-name></person-group>, &#x201C;<article-title>Secrecy analysis of PLC system with channel gain and impulsive noise</article-title>,&#x201D; in <conf-name>2019 IEEE 90th Veh. Tech. Conf. (VTC2019-Fall)</conf-name>, <publisher-loc>Honolulu, HI, USA</publisher-loc>, <year>2019</year>, pp. <fpage>1</fpage>&#x2013;<lpage>6</lpage>.</mixed-citation></ref>
<ref id="ref-36"><label>[36]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>M.</given-names> <surname>Faheem</surname></string-name>, <string-name><given-names>H.</given-names> <surname>Kuusniemi</surname></string-name>, <string-name><given-names>B.</given-names> <surname>Eltahawy</surname></string-name>, <string-name><given-names>M. S.</given-names> <surname>Bhutta</surname></string-name>, and <string-name><given-names>B.</given-names> <surname>Raza</surname></string-name></person-group>, &#x201C;<article-title>A lightweight smart contracts framework for blockchain-based secure communication in smart grid applications</article-title>,&#x201D; <source>IET Gen., Trans. Distrib.</source>, vol. <volume>18</volume>, no. <issue>3</issue>, pp. <fpage>625</fpage>&#x2013;<lpage>638</lpage>, <year>2024</year>. doi: <pub-id pub-id-type="doi">10.1049/gtd2.13103</pub-id>.</mixed-citation></ref>
<ref id="ref-37"><label>[37]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>N.</given-names> <surname>Mensi</surname></string-name>, <string-name><given-names>D. B.</given-names> <surname>Rawat</surname></string-name>, and <string-name><given-names>E.</given-names> <surname>Balti</surname></string-name></person-group>, &#x201C;<article-title>Gradient ascent algorithm for enhancing secrecy rate in wireless communications for smart grid</article-title>,&#x201D; <source>IEEE Trans. Green Commun. Netw.</source>, vol. <volume>6</volume>, no. <issue>1</issue>, pp. <fpage>107</fpage>&#x2013;<lpage>116</lpage>, <year>2021</year>. doi: <pub-id pub-id-type="doi">10.1109/TGCN.2021.3093821</pub-id>.</mixed-citation></ref>
<ref id="ref-38"><label>[38]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>J.</given-names> <surname>Wang</surname></string-name>, <string-name><given-names>M.</given-names> <surname>Khishe</surname></string-name>, <string-name><given-names>M.</given-names> <surname>Kaveh</surname></string-name>, and <string-name><given-names>H.</given-names> <surname>Mohammadi</surname></string-name></person-group>, &#x201C;<article-title>Binary chimp optimization algorithm (BChOA): A new binary me-ta-heuristic for solving optimization problems</article-title>,&#x201D; <source>Cognit. Comput.</source>, vol. <volume>13</volume>, no. <issue>5</issue>, pp. <fpage>1297</fpage>&#x2013;<lpage>1316</lpage>, <year>2021</year>. doi: <pub-id pub-id-type="doi">10.1007/s12559-021-09933-7</pub-id>.</mixed-citation></ref>
<ref id="ref-39"><label>[39]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>M.</given-names> <surname>Aljebreen</surname></string-name> <etal>et al.</etal></person-group>, &#x201C;<article-title>Binary chimp optimization algorithm with ML based intrusion detection for secure IoT-assisted wireless sensor networks</article-title>,&#x201D; <source>Sensors</source>, vol. <volume>23</volume>, no. <issue>8</issue>, <year>2023, Art. no. 4073</year>. doi: <pub-id pub-id-type="doi">10.3390/s23084073</pub-id>; <pub-id pub-id-type="pmid">37112414</pub-id></mixed-citation></ref>
<ref id="ref-40"><label>[40]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>M.</given-names> <surname>Kaveh</surname></string-name> and <string-name><given-names>M. S.</given-names> <surname>Mesgari</surname></string-name></person-group>, &#x201C;<article-title>Application of meta-heuristic algorithms for training neural networks and deep learning architectures: A comprehensive review</article-title>,&#x201D; <source>Neural Process. Lett.</source>, vol. <volume>55</volume>, no. <issue>4</issue>, pp. <fpage>4519</fpage>&#x2013;<lpage>4622</lpage>, <year>2022</year>. doi: <pub-id pub-id-type="doi">10.1007/s11063-022-11055-6</pub-id>; <pub-id pub-id-type="pmid">36339645</pub-id></mixed-citation></ref>
<ref id="ref-41"><label>[41]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>A. K.</given-names> <surname>Shakya</surname></string-name>, <string-name><given-names>G.</given-names> <surname>Pillai</surname></string-name>, and <string-name><given-names>S.</given-names> <surname>Chakrabarty</surname></string-name></person-group>, &#x201C;<article-title>Reinforcement learning algorithms: A brief survey</article-title>,&#x201D; <source>Expert. Syst. Appl.</source>, vol. <volume>231</volume>, no. <issue>7</issue>, <year>2023, Art. no. 120495</year>. doi: <pub-id pub-id-type="doi">10.1016/j.eswa.2023.120495</pub-id>.</mixed-citation></ref>
</ref-list>
</back></article>