<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.1 20151215//EN" "http://jats.nlm.nih.gov/publishing/1.1/JATS-journalpublishing1.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="1.1">
<front>
<journal-meta>
<journal-id journal-id-type="pmc">CMC</journal-id>
<journal-id journal-id-type="nlm-ta">CMC</journal-id>
<journal-id journal-id-type="publisher-id">CMC</journal-id>
<journal-title-group>
<journal-title>Computers, Materials &#x0026; Continua</journal-title>
</journal-title-group>
<issn pub-type="epub">1546-2226</issn>
<issn pub-type="ppub">1546-2218</issn>
<publisher>
<publisher-name>Tech Science Press</publisher-name>
<publisher-loc>USA</publisher-loc>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">20066</article-id>
<article-id pub-id-type="doi">10.32604/cmc.2022.020066</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Article</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Optimal Deep Reinforcement Learning for Intrusion Detection in UAVs</article-title>
<alt-title alt-title-type="left-running-head">Optimal Deep Reinforcement Learning for Intrusion Detection in UAVs</alt-title>
<alt-title alt-title-type="right-running-head">Optimal Deep Reinforcement Learning for Intrusion Detection in UAVs</alt-title>
</title-group>
<contrib-group content-type="authors">
<contrib id="author-1" contrib-type="author">
<name name-style="western">
<surname>Praveena</surname>
<given-names>V.</given-names>
</name>
<xref ref-type="aff" rid="aff-1">1</xref>
</contrib>
<contrib id="author-2" contrib-type="author">
<name name-style="western">
<surname>Vijayaraj</surname>
<given-names>A.</given-names>
</name>
<xref ref-type="aff" rid="aff-2">2</xref>
</contrib>
<contrib id="author-3" contrib-type="author">
<name name-style="western">
<surname>Chinnasamy</surname>
<given-names>P.</given-names>
</name>
<xref ref-type="aff" rid="aff-3">3</xref>
</contrib>
<contrib id="author-4" contrib-type="author" corresp="yes">
<name name-style="western">
<surname>Ali</surname>
<given-names>Ihsan</given-names>
</name>
<xref ref-type="aff" rid="aff-4">4</xref><email>ihsanali@ieee.org</email>
</contrib>
<contrib id="author-5" contrib-type="author">
<name name-style="western">
<surname>Alroobaea</surname>
<given-names>Roobaea</given-names>
</name>
<xref ref-type="aff" rid="aff-5">5</xref>
</contrib>
<contrib id="author-6" contrib-type="author">
<name name-style="western">
<surname>Alyahyan</surname>
<given-names>Saleh Yahya</given-names>
</name>
<xref ref-type="aff" rid="aff-6">6</xref>
</contrib>
<contrib id="author-7" contrib-type="author">
<name name-style="western">
<surname>Raza</surname>
<given-names>Muhammad Ahsan</given-names>
</name>
<xref ref-type="aff" rid="aff-7">7</xref>
</contrib>
<aff id="aff-1"><label>1</label><institution>Department of Computer Science and Engineering, Dr. N. G. P Institute of Technology</institution>, <addr-line>Coimbatore, 641048</addr-line>, <country>India</country></aff>
<aff id="aff-2"><label>2</label><institution>Department of Information Technology, Vignan&#x2019;s Foundation for Science, Technology &#x0026; Research</institution>, <addr-line>Guntur, 522213</addr-line>, <country>India</country></aff>
<aff id="aff-3"><label>3</label><institution>Department of Information Technology, Sri Shakthi Institute of Engineering and Technology</institution>, <addr-line>Coimbatore, 641062</addr-line>, <country>India</country></aff>
<aff id="aff-4"><label>4</label><institution>Department of Computer System and Technology, Faculty of Computer Science and Information Technology, University of Malaya</institution>, <addr-line>Kuala Lumpur, 50603</addr-line>, <country>Malaysia</country></aff>
<aff id="aff-5"><label>5</label><institution>Department of Computer Science, College of Computers and Information Technology, Taif University</institution>, <addr-line>Taif, 21944</addr-line>, <country>Saudi Arabia</country></aff>
<aff id="aff-6"><label>6</label><institution>Department of Computer Science, Community College in Dwadmi, Shaqra University</institution>, <addr-line>11961</addr-line>, <country>Saudi Arabia</country></aff>
<aff id="aff-7"><label>7</label><institution>Department of Information Technology, Bahauddin Zakariya University</institution>, <addr-line>Multan, 60000</addr-line>, <country>Pakistan</country></aff>
</contrib-group>
<author-notes>
<corresp id="cor1"><label>&#x002A;</label>Corresponding Author: Ihsan Ali. Email: <email>ihsanali@ieee.org</email></corresp>
</author-notes>
<pub-date pub-type="epub" date-type="pub" iso-8601-date="2021-09-13"><day>13</day><month>9</month><year>2021</year></pub-date>
<volume>70</volume>
<issue>2</issue>
<fpage>2639</fpage>
<lpage>2653</lpage>
<history>
<date date-type="received"><day>07</day><month>5</month><year>2021</year>
</date>
<date date-type="accepted"><day>15</day><month>6</month><year>2021</year>
</date>
</history>
<permissions>
<copyright-statement>&#x00A9; 2022 Praveena et al.</copyright-statement>
<copyright-year>2022</copyright-year>
<copyright-holder>Praveena et al.</copyright-holder>
<license xlink:href="https://creativecommons.org/licenses/by/4.0/">
<license-p>This work is licensed under a <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution 4.0 International License</ext-link>, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
</license>
</permissions>
<self-uri content-type="pdf" xlink:href="TSP_CMC_20066.pdf"></self-uri>
<abstract>
<p>In recent years, progressive developments have been observed in recent technologies and the production cost has been continuously decreasing. In such scenario, Internet of Things (IoT) network which is comprised of a set of Unmanned Aerial Vehicles (UAV), has received more attention from civilian to military applications. But network security poses a serious challenge to UAV networks whereas the intrusion detection system (IDS) is found to be an effective process to secure the UAV networks. Classical IDSs are not adequate to handle the latest computer networks that possess maximum bandwidth and data traffic. In order to improve the detection performance and reduce the false alarms generated by IDS, several researchers have employed Machine Learning (ML) and Deep Learning (DL) algorithms to address the intrusion detection problem. In this view, the current research article presents a deep reinforcement learning technique, optimized by Black Widow Optimization (DRL-BWO) algorithm, for UAV networks. In addition, DRL involves an improved reinforcement learning-based Deep Belief Network (DBN) for intrusion detection. For parameter optimization of DRL technique, BWO algorithm is applied. It helps in improving the intrusion detection performance of UAV networks. An extensive set of experimental analysis was performed to highlight the supremacy of the proposed model. From the simulation values, it is evident that the proposed method is appropriate as it attained high precision, recall, F-measure, and accuracy values such as 0.985, 0.993, 0.988, and 0.989 respectively.</p>
</abstract>
<kwd-group kwd-group-type="author">
<kwd>Intrusion detection</kwd>
<kwd>UAV networks</kwd>
<kwd>reinforcement learning</kwd>
<kwd>deep learning</kwd>
<kwd>parameter optimization</kwd>
</kwd-group>
</article-meta>
</front>
<body>
<sec id="s1">
<label>1</label>
<title>Introduction</title>
<p>The exponential developments in the fields of cloud computing and artificial intelligence technologies have drastically improved the design of Internet of Things (IoT) technologies. Various smart devices have the ability to generate and receive massive quantities of data through communication and interconnectivity. The familiarity of IoT technologies and the smartness of gadgets have provided a comfy lifestyle to its users. Nevertheless, the utilization of latest technologies and intelligent gadgets paved the way for new security and privacy issues [<xref ref-type="bibr" rid="ref-1">1</xref>]. IoT network is deemed to be a significant target for hackers since the IoT devices gather and archive massive quantities of private data of clients. At this moment, the protection of user&#x2019;s private data and security are highly essential [<xref ref-type="bibr" rid="ref-2">2</xref>]. Due to the progression of technologies and constant reduction in production cost, there is an increasing penetration of IoT network that comprises of Unmanned Aerial Vehicles (UAVs) right starting from manufacturing &#x0026; production to daily lives of the people in terms of border surveillance. At present, UAVs are extensively utilized in movie and television shooting, smart farming, climate observation, forest fire recognition, disaster management, etc. But UAVs brought distinct accessibilities to life and increased productivity whereas the network security issues are occurring in parallel [<xref ref-type="bibr" rid="ref-3">3</xref>,<xref ref-type="bibr" rid="ref-4">4</xref>].</p>
<p>If a number of UAVs collaboratively carry out its functions, it is essential to design a data connection channel among themselves so as to develop a mobile self-organized network of UAVs. UAV system allows the real-time distribution of data using mobile networks that do not require transmission from ground station. It increases the survival and combating abilities of UAV network in an efficient manner. Since UAV network is a sub kind in Mobile Ad hoc Network (MANET), a typical attack in MANET affects the UAV network too. Due to the existence of different network accessing techniques and openness of networks, UAV networks suffer from unavoidable security challenges. The defensive operations of classical network security technologies are frequently passive and it is challenging to resist the network attacks using such unstable technologies.</p>
<p>When it comes to dynamic defensive network security technologies, Intrusion Detection System (IDS) has limitations of conventional security technologies. However, intrusion detection systems have received considerable interest on client end though quite a few difficulties have to be overcome in real-time applications. Classical IDS usually experiences inadequate efficiency and ineffectiveness, particularly when handling recent computer networks that work with high bandwidth and enormous data traffic. Since the attacks are highly complicated, automatic, and distributed, the classical IDS does not fulfil the requirements of recent network security challenges. This scenario enhances the Detection Rate (DR) and diminish the false alarm frequency of IDSs. Various studies have presented Machine Learning (ML) techniques in the domain of intrusion detection.</p>
<p>Evolutionary Algorithms (EAs) are simulated from the concept of natural evolution yet with few variations theoretically. The variations exist because of the nature that every algorithm follows a different creature or that the behavior of individuals grow and create new solutions. In EA, a population of possible solutions attempts at survival based on the validation of fitness in a particular platform. They arbitrarily accomplish the optimization procedure. The initial population of optimization process is generated arbitrarily and it alters the fixed functions over a particular number of iterations or rounds. Different processes of reproduction, migration, and solution designing over optimization makes each one different from another.</p>
<p>Many population-oriented algorithms do not follow any structure. It depicts an identical feature during searching process based on exploration and exploitation stages which forms the major characteristics of algorithm. To obtain the maximum performance, metaheuristic techniques maintain a tradeoff between exploration and exploitation levels in searching area. The exploration level provides a chance to observe different &#x0026; significant regions in a search space and generate new solutions to escape from the local optima issue. The exploitation stage denotes the convergence ability of the algorithm and obtains predictable solutions during exploration process. So, a better outcome between the exploration and exploitation stages ensures the avoidance of local optimization problems and achievement of better convergence speed. Besides, proper management of these two phases can reach the global optima.</p>
<p>Though several metaheuristic algorithms are available in the literature, the current research article utilizes Black Widow Optimization (BWO) algorithm. This BWO algorithm is framed on the basis of interesting nature of Black Widow (BW) spiders. It encompasses an important process of cannibalism. In this process, spiders without fitness are discarded from the region which results in earlier convergence. It significantly varies from other population-based optimization algorithms. BWO algorithm provides effective outcomes on exploitation and exploration levels. Besides, it provides rapid convergence and eliminates the local optimum issue. It is also noted that the BWO algorithm has the ability to investigate maximum search space to reach the global best solutions. Therefore, BWO algorithm can be utilized to solve the hyperparameter optimization problem.</p>
<p>The current research work presents a Deep Reinforcement Learning technique optimized by Black Widow Optimization (DRL-BWO) algorithm for UAV networks. In addition, DRL involves improved reinforcement learning-based Deep Belief Network (DBN) for intrusion detection. For parameter optimization of DRL technique, BWO algorithm is applied which helps in improving the intrusion detection performance among UAV networks. An extensive set of experimental analyses was conducted to highlight the supremacy of the proposed model. The contribution of this research article is summarized herewith.
<list list-type="bullet">
<list-item>
<p>DRL-BWO algorithm is proposed for intrusion detection in UAV networks</p></list-item>
<list-item>
<p>An improved reinforcement learning-based DBN model is employed with softmax layer for the detection of intrusions in UAV networks</p></list-item>
<list-item>
<p>For hyperparameter optimization of reinforced DBN model, BWO algorithm is utilized through which DR is enhanced</p></list-item>
<list-item>
<p>The intrusion detection performance of the DRL-BWO algorithm was validated against NSL-KDD Cup dataset</p></list-item>
</list></p>
<p>Rest of the sections in the paper are arranged as follows. Section 2 offers the works related to the domain and Section 3 introduces the presented DRL-BWO technique for UAV networks. Further, Section 4 validates the performance of the proposed DRL-BWO algorithm. Finally, Section 5 concludes the work.</p>
</sec>
<sec id="s2">
<label>2</label>
<title>Related Works</title>
<p>Shah et al. [<xref ref-type="bibr" rid="ref-5">5</xref>] studied the efficiency of two open source IDSs named Snort as well as Surcata. The outcome illustrated that an improved DR can be achieved when utilizing optimum Support Vector Machine (SVM) and firefly (FF) techniques. Kabir et al. [<xref ref-type="bibr" rid="ref-6">6</xref>] developed a novel model based on least squares SVM (LS-SVM) for IDS. Wang et al. [<xref ref-type="bibr" rid="ref-7">7</xref>] designed an IDS using SVM through feature augmentation process. With the conversion of logarithmic marginal density ratio for generating actual features, the novel efficient change features are achieved that considerably enhances the detection capability of a technique. Ahmed et al. [<xref ref-type="bibr" rid="ref-8">8</xref>] introduced a learning technique for IDS with the help of Neural Network (NN) that has better output in terms of convergence rate and learning time.</p>
<p>Hu et al. [<xref ref-type="bibr" rid="ref-9">9</xref>] developed a distributed IDS in which a local parameterized detection method is built for every individual node using an online Adaboost technique. Ma et al. [<xref ref-type="bibr" rid="ref-10">10</xref>] introduced a new method named SCDNN that associates Spectral Clustering (SC) and Deep NN (DNN) techniques. The simulation outcome depicted that the SCDNN classification model works efficiently over Back Propagation Neural Network (BPNN) and SVM models. But Deep Learning (DL) models have been extensively applied, thanks to its better efficiency in big data analytics and its feasibility in resolving intrusion detection issues of enormous, highly dimension, and non-linear data. With the construction of a nonlinear network along with multiple hidden layers, the low dimension features that are simple to categorize the data could be attained, and intrusion detection performance can be enhanced.</p>
<p>Hinton et al. [<xref ref-type="bibr" rid="ref-11">11</xref>] introduced a DL technique named Deep Belief Network (DBN) which sparked widespread interest among researchers. This technique converts high dimension and non-linear data features into abstracts that are appropriate for pattern classification using layer-wise feature extraction. Qu et al. [<xref ref-type="bibr" rid="ref-12">12</xref>] presented an IDS using DBN which is efficiently enhanced to find the abnormalities. Liang et al. [<xref ref-type="bibr" rid="ref-13">13</xref>] developed an IDS depending on DBN and ELM (Extreme Learning Machine) that increases the DR and effectiveness of algorithmic operations. The number of hidden layer node counts can be optimally found using Particle Swarm Optimization (PSO) technique.</p>
<p>In the literature [<xref ref-type="bibr" rid="ref-14">14</xref>], the researchers developed an effective technique to find indoor and open-air three-dimensional (3D) areas of nodes by determining the signal strength. The mathematical formulation is performed based on path-loss model and decision tree. The study conducted earlier [<xref ref-type="bibr" rid="ref-15">15</xref>] presented an IDS which is designed to be included in network gateway so that it can determine the attacks and filter the over length packets. IDS is executed based on integer optimization issue by the minimization of false alarm probability, while it maintains the missed detection probability below a desired level.</p>
<p>Few intelligent search algorithms proposed so far are Simulated Annealing (SA), ant colony algorithm, Genetic Algorithm (GA), and PSO. In ant colony technique, the time taken for resolution is high and is prone to premature. The original outcome of SA technique is considerably influenced by the variables such as global optimization and computation efficiency. On the other hand, Bayesian optimization techniques are frequently employed in the optimization of hyper parameters. Though it has the benefit of low number of iterations, it falls easily into local optima. Therefore, BWO algorithm can be utilized in resolving the hyperparameter optimization problem.</p>
</sec>
<sec id="s3">
<label>3</label>
<title>The Proposed DRL-BWO Based Intrusion Detection in UAV Networks</title>
<p>The working procedure involved in the proposed DRL-BWO algorithm is shown in <?A3B2 "fig1",5,"anchor"?><xref ref-type="fig" rid="fig-1">Fig. 1</xref>. From the figure, it is apparent that the networking data, fed as input, undergoes preprocessing to remove the unwanted data and transform it into a compatible format. Then, DBN model is applied to determine the existence of intrusions in UAV networks. Finally, BWO algorithm is employed to determine the optimal hyperparameter values involved in the presented model.</p>
<fig id="fig-1">
<label>Figure 1</label>
<caption>
<title>The working process of DRL-BWO model</title>
</caption>
<graphic mimetype="image" mime-subtype="png" xlink:href="CMC_20066-fig-1.png"/>
</fig>
<sec id="s3_1">
<label>3.1</label>
<title>Reinforcement Learning</title>
<p>DRL is integrated with RL and DNN. This combination allows the RL agents to improve if the provided conditions are separately explored. When a RL agent is a learning task, the situation provides the required data to agent based on its performance <italic>i.e</italic>., either best or worst. With this data, the agent should separately perform the task which results in the optimal execution of task purposes. The purposes can be illustrated by reward function which allocates the numerical value for all the performed actions in the provided state. Besides, an agent attains a novel state in the event of an action accomplishment. So, the agent connects the states with performances so as to maximize <inline-formula id="ieqn-1"><mml:math id="mml-ieqn-1"><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:mn>0</mml:mn></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:mi>&#x03B3;</mml:mi><mml:mo>&#x22C5;</mml:mo><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msup><mml:mi>&#x03B3;</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup><mml:mo>&#x22C5;</mml:mo><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:mo>&#x2026;</mml:mo></mml:math></inline-formula>. Here <inline-formula id="ieqn-2"><mml:math id="mml-ieqn-2"><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> refers to the achieved reward in <inline-formula id="ieqn-3"><mml:math id="mml-ieqn-3"><mml:mi>i</mml:mi></mml:math></inline-formula>-th episode and <inline-formula id="ieqn-4"><mml:math id="mml-ieqn-4"><mml:mi>&#x03B3;</mml:mi></mml:math></inline-formula> implies a discount factor measure which refers to how effective the future performances will be.</p>
<p>An essential part of RL model is Markov Decision Process (MDP) in which the upcoming moves as well as rewards are distributed only with the present state and chosen performance. Thus, when the Markovian assets exists in a state, such states possess all the data required for dynamic tasks. For sample, chess is a common instance of Markovian asset. During this game, the historical decisions have no say in decision making process for further proceedings [<xref ref-type="bibr" rid="ref-16">16</xref>]. Each data is already explained in present sharing of pieces over the board. Conversely, when the present state is identified, the earlier transitions which directed the agent to that condition develop in an unrelated manner in terms of decision-making process.</p>
<p>MDP is appropriately determined by 4-tuple <inline-formula id="ieqn-5"><mml:math id="mml-ieqn-5"><mml:mo>&#x003C;</mml:mo><mml:mi>S</mml:mi><mml:mo>,</mml:mo></mml:math></inline-formula> <inline-formula id="ieqn-6"><mml:math id="mml-ieqn-6"><mml:mi>A</mml:mi></mml:math></inline-formula>, <inline-formula id="ieqn-7"><mml:math id="mml-ieqn-7"><mml:mi>&#x03B4;</mml:mi></mml:math></inline-formula>, <inline-formula id="ieqn-8"><mml:math id="mml-ieqn-8"><mml:mi>r</mml:mi><mml:mo>&#x003E;</mml:mo></mml:math></inline-formula> where:</p>
<p><inline-formula id="ieqn-9"><mml:math id="mml-ieqn-9"><mml:mi>S</mml:mi></mml:math></inline-formula> refers to limited group of system states, <inline-formula id="ieqn-10"><mml:math id="mml-ieqn-10"><mml:mi>s</mml:mi><mml:mo>&#x2208;</mml:mo><mml:mi>S</mml:mi></mml:math></inline-formula>;</p>
<p><inline-formula id="ieqn-11"><mml:math id="mml-ieqn-11"><mml:mi>A</mml:mi></mml:math></inline-formula> denotes a limited group of actions, <inline-formula id="ieqn-12"><mml:math id="mml-ieqn-12"><mml:mi>a</mml:mi><mml:mo>&#x2208;</mml:mo><mml:mi>A</mml:mi></mml:math></inline-formula>, and <inline-formula id="ieqn-13"><mml:math id="mml-ieqn-13"><mml:msub><mml:mi>A</mml:mi><mml:mrow><mml:mrow><mml:msub><mml:mi>s</mml:mi><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mrow></mml:msub><mml:mo>&#x2208;</mml:mo><mml:mi>A</mml:mi></mml:math></inline-formula> indicate a limited group of actions that are accessible in <inline-formula id="ieqn-14"><mml:math id="mml-ieqn-14"><mml:msub><mml:mi>s</mml:mi><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2208;</mml:mo><mml:mi>S</mml:mi></mml:math></inline-formula> at time <inline-formula id="ieqn-15"><mml:math id="mml-ieqn-15"><mml:mi>t</mml:mi></mml:math></inline-formula>;</p>
<p><inline-formula id="ieqn-16"><mml:math id="mml-ieqn-16"><mml:mi>&#x03B4;</mml:mi></mml:math></inline-formula> implies the transition process <inline-formula id="ieqn-17"><mml:math id="mml-ieqn-17"><mml:mi>&#x03B4;</mml:mi><mml:mo>&#x003A;</mml:mo><mml:mi>S</mml:mi><mml:mo>&#x00D7;</mml:mo><mml:mi>A</mml:mi><mml:mo stretchy="false">&#x2192;</mml:mo><mml:mi>S</mml:mi></mml:math></inline-formula>;</p>
<p><inline-formula id="ieqn-18"><mml:math id="mml-ieqn-18"><mml:mi>r</mml:mi></mml:math></inline-formula> signifies a direct reward (or reinforcement) function <inline-formula id="ieqn-19"><mml:math id="mml-ieqn-19"><mml:mi>r</mml:mi></mml:math></inline-formula>: <inline-formula id="ieqn-20"><mml:math id="mml-ieqn-20"><mml:mi>S</mml:mi><mml:mo>&#x00D7;</mml:mo><mml:mi>A</mml:mi><mml:mo stretchy="false">&#x2192;</mml:mo><mml:mrow><mml:mi mathvariant="double-struck">R</mml:mi></mml:mrow><mml:mo>.</mml:mo></mml:math></inline-formula></p>
<p>During the timestep <inline-formula id="ieqn-21"><mml:math id="mml-ieqn-21"><mml:mi>t</mml:mi></mml:math></inline-formula>, an agent observes the present state <inline-formula id="ieqn-22"><mml:math id="mml-ieqn-22"><mml:msub><mml:mi>s</mml:mi><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2208;</mml:mo><mml:mi>S</mml:mi></mml:math></inline-formula> and selects their action <italic>i.e.</italic>, <inline-formula id="ieqn-23"><mml:math id="mml-ieqn-23"><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2208;</mml:mo><mml:mi>A</mml:mi></mml:math></inline-formula> to be performed. A situation provides a reward <inline-formula id="ieqn-24"><mml:math id="mml-ieqn-24"><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mi>r</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>s</mml:mi><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mspace width="thinmathspace" /><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> and the agent goes to a state <inline-formula id="ieqn-25"><mml:math id="mml-ieqn-25"><mml:msub><mml:mi>s</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mo>+</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mi>&#x03B4;</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>s</mml:mi><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mspace width="thinmathspace" /><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>). The functions <inline-formula id="ieqn-26"><mml:math id="mml-ieqn-26"><mml:mi>r</mml:mi></mml:math></inline-formula> and <inline-formula id="ieqn-27"><mml:math id="mml-ieqn-27"><mml:mi>&#x03B4;</mml:mi></mml:math></inline-formula> are decided based on the present state <inline-formula id="ieqn-28"><mml:math id="mml-ieqn-28"><mml:msub><mml:mi>s</mml:mi><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> and action <inline-formula id="ieqn-29"><mml:math id="mml-ieqn-29"><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> only. Hence, it cannot be considered as a memory procedure. In addition, an agent goes to study the policy &#x03C0; : <inline-formula id="ieqn-30"><mml:math id="mml-ieqn-30"><mml:mi>S</mml:mi><mml:mo stretchy="false">&#x2192;</mml:mo><mml:mi>A</mml:mi></mml:math></inline-formula> since the state <inline-formula id="ieqn-31"><mml:math id="mml-ieqn-31"><mml:msub><mml:mi>s</mml:mi><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> produces a maximum value or discounted reward as represented in <xref ref-type="disp-formula" rid="eqn-1">Eq. (1)</xref>:</p>
<p><disp-formula id="eqn-1">
<label>(1)</label>
<mml:math id="mml-eqn-1" display="block"><mml:msup><mml:mrow><mml:mi mathvariant="script">Q</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x03C0;</mml:mi></mml:mrow></mml:msup><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mi>s</mml:mi><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mspace width="thinmathspace" /><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:mi>&#x03B3;</mml:mi><mml:mo>&#x22C5;</mml:mo><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mo>+</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msup><mml:mi>&#x03B3;</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup><mml:mo>&#x22C5;</mml:mo><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mo>+</mml:mo><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:mo>&#x2026;</mml:mo><mml:mo>=</mml:mo><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:mrow><mml:mrow><mml:mrow><mml:mi mathvariant="normal">&#x221E;</mml:mi></mml:mrow></mml:mrow></mml:munderover><mml:msup><mml:mrow><mml:mi mathvariant="normal">&#x03B3;</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msup><mml:mo>&#x22C5;</mml:mo><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mo>+</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub></mml:math>
</disp-formula></p>
<p>where <inline-formula id="ieqn-32"><mml:math id="mml-ieqn-32"><mml:msup><mml:mrow><mml:mi mathvariant="script">Q</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x03C0;</mml:mi></mml:mrow></mml:msup><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>s</mml:mi><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mspace width="thinmathspace" /><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> is the action-value function succeeding the procedure <inline-formula id="ieqn-33"><mml:math id="mml-ieqn-33"><mml:mi>&#x03C0;</mml:mi></mml:math></inline-formula> (<italic>e.g</italic>., selecting action <inline-formula id="ieqn-34"><mml:math id="mml-ieqn-34"><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>) in a provided state <inline-formula id="ieqn-35"><mml:math id="mml-ieqn-35"><mml:msub><mml:mi>s</mml:mi><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>.</p>
<p>The end purpose of RL is to determine the best procedure <inline-formula id="ieqn-36"><mml:math id="mml-ieqn-36"><mml:mo stretchy="false">(</mml:mo><mml:msup><mml:mi>&#x03C0;</mml:mi><mml:mrow><mml:mrow><mml:mo>&#x2217;</mml:mo></mml:mrow></mml:mrow></mml:msup><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> that maps the states to actions in order to maximize the future reward <inline-formula id="ieqn-37"><mml:math id="mml-ieqn-37"><mml:mo stretchy="false">(</mml:mo><mml:mi>r</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> over time <inline-formula id="ieqn-38"><mml:math id="mml-ieqn-38"><mml:mo stretchy="false">(</mml:mo><mml:mi>t</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> by rate of discount <inline-formula id="ieqn-39"><mml:math id="mml-ieqn-39"><mml:mrow><mml:mo>(</mml:mo><mml:mi>&#x03B3;</mml:mi><mml:mo>&#x2208;</mml:mo><mml:mrow><mml:mo>[</mml:mo><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mspace width="thinmathspace" /><mml:mn>1</mml:mn><mml:mo>]</mml:mo></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:math></inline-formula>, as illustrated in <xref ref-type="disp-formula" rid="eqn-2">Eq. (2)</xref>. In this formula, <inline-formula id="ieqn-40"><mml:math id="mml-ieqn-40"><mml:msub><mml:mrow><mml:mi mathvariant="double-struck">E</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x03C0;</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>[</mml:mo><mml:mspace width="thinmathspace" /><mml:mo>]</mml:mo></mml:mrow></mml:math></inline-formula> indicates the estimated value provided that the agent follows a procedure <inline-formula id="ieqn-41"><mml:math id="mml-ieqn-41"><mml:mi>&#x03C0;</mml:mi></mml:math></inline-formula> and <inline-formula id="ieqn-42"><mml:math id="mml-ieqn-42"><mml:msup><mml:mrow><mml:mi mathvariant="script">Q</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo>&#x2217;</mml:mo></mml:mrow></mml:mrow></mml:msup><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mi>s</mml:mi><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mspace width="thinmathspace" /><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow></mml:math></inline-formula> implies a better action-value function. In DRL, an estimate function, executed by DNN, permits an agent to work <inline-formula id="ieqn-43"><mml:math id="mml-ieqn-43"><mml:mi>i</mml:mi></mml:math></inline-formula> th highly-dimensional spaces like pixels of an image:</p>
<p><disp-formula id="eqn-2">
<label>(2)</label>
<mml:math id="mml-eqn-2" display="block"><mml:msup><mml:mrow><mml:mi mathvariant="script">Q</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo>&#x2217;</mml:mo></mml:mrow></mml:mrow></mml:msup><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mi>s</mml:mi><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mspace width="thinmathspace" /><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mo movablelimits="true" form="prefix">max</mml:mo><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mrow><mml:mi mathvariant="double-struck">E</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x03C0;</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>[</mml:mo><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:mi>&#x03B3;</mml:mi><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mo>+</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msup><mml:mi>&#x03B3;</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mo>+</mml:mo><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:mo>&#x22EF;</mml:mo><mml:mo>&#x2223;</mml:mo><mml:msub><mml:mi>s</mml:mi><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mi>s</mml:mi><mml:mo>,</mml:mo><mml:mspace width="thinmathspace" /><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mi>a</mml:mi><mml:mo>,</mml:mo><mml:mspace width="thinmathspace" /><mml:mi>&#x03C0;</mml:mi><mml:mo>]</mml:mo></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:math>
</disp-formula></p>
<p>Interactive feedback is the model which enhances the learning time of RL agent. During this technique, an external trainer is directed at an agent&#x2019;s apprenticeship to explore further promising regions at initial learning phase. External trainer is an agent who might be a human, robot, or other artificial agent too.</p>
</sec>
<sec id="s3_2">
<label>3.2</label>
<title>DRL Based DBN Model</title>
<p>Restricted Boltzmann Machine (RBM) refers to a stochastic physics-oriented computation technique which can learn the intrinsic patterns of data distribution scenarios. It can be defined as a bipartite graph in which the data comprises of a visible input called layer <inline-formula id="ieqn-44"><mml:math id="mml-ieqn-44"><mml:mi>v</mml:mi></mml:math></inline-formula>, whereas hidden n-dimensional vector <inline-formula id="ieqn-45"><mml:math id="mml-ieqn-45"><mml:mi>h</mml:mi></mml:math></inline-formula> contains a number of hidden neurons. The training process of the model aims at minimizing the energy of model, as defined below.</p>
<p><disp-formula id="eqn-3">
<label>(3)</label>
<mml:math id="mml-eqn-3" display="block"><mml:mi>E</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mi>v</mml:mi><mml:mo>,</mml:mo><mml:mspace width="thinmathspace" /><mml:mi>h</mml:mi><mml:mo>)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mo>&#x2212;</mml:mo><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>m</mml:mi></mml:mrow></mml:munderover><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mi>v</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>j</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:munderover><mml:msub><mml:mi>c</mml:mi><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mi>h</mml:mi><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>m</mml:mi></mml:mrow></mml:munderover><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>j</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:munderover><mml:msub><mml:mi>w</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mi>v</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mi>h</mml:mi><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo></mml:math>
</disp-formula></p>
<p>where <inline-formula id="ieqn-46"><mml:math id="mml-ieqn-46"><mml:mi>m</mml:mi></mml:math></inline-formula> and <inline-formula id="ieqn-47"><mml:math id="mml-ieqn-47"><mml:mi>n</mml:mi></mml:math></inline-formula> refer to the dimensions of visible as well as hidden layers, <inline-formula id="ieqn-48"><mml:math id="mml-ieqn-48"><mml:mi>b</mml:mi></mml:math></inline-formula> and <inline-formula id="ieqn-49"><mml:math id="mml-ieqn-49"><mml:mi>c</mml:mi></mml:math></inline-formula> are the corresponding bias vectors, additional <inline-formula id="ieqn-50"><mml:math id="mml-ieqn-50"><mml:mi>W</mml:mi></mml:math></inline-formula> signifies the weight matrix that link these two layers, and <inline-formula id="ieqn-51"><mml:math id="mml-ieqn-51"><mml:msub><mml:mi>w</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> denotes the link between the visible and hidden units of <inline-formula id="ieqn-52"><mml:math id="mml-ieqn-52"><mml:mi>i</mml:mi></mml:math></inline-formula> and <inline-formula id="ieqn-53"><mml:math id="mml-ieqn-53"><mml:mi>j</mml:mi></mml:math></inline-formula> respectively. It is noted that the RBM is limited inferring that none of the connections are enabled amongst the neurons of identical layer. It is considered to be resolved through the determination of joint probability of visible and hidden neurons. But, this method is intractable as it needs a partition function computation, <italic>i.e</italic>., computation of all probable configurations of the network. So, Hinton presented Contrastive Divergence (CD) to estimate the conditional probability of visible as well as hidden neurons that utilize Gibbs sampling over Monte Carlo Markov Chain (MCMC) method. Henceforth, a probability of input as well as hidden units are determined here:</p>
<p><disp-formula id="eqn-4">
<label>(4)</label>
<mml:math id="mml-eqn-4" display="block"><mml:mi>p</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mi>h</mml:mi><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mo>&#x2223;</mml:mo><mml:mi>v</mml:mi><mml:mo>)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mi>&#x03C3;</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mi>c</mml:mi><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>m</mml:mi></mml:mrow></mml:munderover><mml:msub><mml:mi>w</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mi>v</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow></mml:math>
</disp-formula></p>
<p>and</p>
<p><disp-formula id="eqn-5">
<label>(5)</label>
<mml:math id="mml-eqn-5" display="block"><mml:mi>p</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mi>v</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mo>&#x2223;</mml:mo><mml:mi>h</mml:mi><mml:mo>)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mi>&#x03C3;</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>j</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:munderover><mml:msub><mml:mi>w</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mi>h</mml:mi><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow></mml:math>
</disp-formula></p>
<p>where <inline-formula id="ieqn-54"><mml:math id="mml-ieqn-54"><mml:mi>&#x03C3;</mml:mi></mml:math></inline-formula> denotes the logistic sigmoid function.</p>
<p>Being a graph-based generative model, DBN is comprised of visible and hidden layers that are linked <italic>via</italic> weight matrices. Further, there are no connections exist among the neurons in an identical layer. Practically, DBN method holds a collection of stacked RBMs where the hidden layers insatiably feed the succeeding visible layer of the RBM. At last, a Softmax layer is used and the weights are tuned with the help of BWO algorithm. It is noticed that <inline-formula id="ieqn-55"><mml:math id="mml-ieqn-55"><mml:msup><mml:mrow><mml:mi mathvariant="normal">W</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>l</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msup><mml:mo>,</mml:mo></mml:math></inline-formula> <inline-formula id="ieqn-56"><mml:math id="mml-ieqn-56"><mml:mi>l</mml:mi><mml:mo>&#x2208;</mml:mo><mml:mrow><mml:mo>[</mml:mo><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mspace width="thinmathspace" /><mml:mi>L</mml:mi><mml:mo>]</mml:mo></mml:mrow></mml:math></inline-formula>, denotes the weight matrix at layer <inline-formula id="ieqn-57"><mml:math id="mml-ieqn-57"><mml:mi>l</mml:mi></mml:math></inline-formula>, where <inline-formula id="ieqn-58"><mml:math id="mml-ieqn-58"><mml:mi>L</mml:mi></mml:math></inline-formula> represents the hidden layer count. In addition, <inline-formula id="ieqn-59"><mml:math id="mml-ieqn-59"><mml:mi>v</mml:mi></mml:math></inline-formula> and <inline-formula id="ieqn-60"><mml:math id="mml-ieqn-60"><mml:msup><mml:mi>h</mml:mi><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>l</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msup></mml:math></inline-formula> denote the visible and hidden layer respectively. <?A3B2 "fig2",5,"anchor"?><xref ref-type="fig" rid="fig-2">Fig. 2</xref> shows the architecture of RL-DBN.</p>
<fig id="fig-2">
<label>Figure 2</label>
<caption>
<title>The architecture of RL-DBN</title>
</caption>
<graphic mimetype="image" mime-subtype="png" xlink:href="CMC_20066-fig-2.png"/>
</fig>
<p>Here, the author presents a residual reinforcement layer-by-layer in DBN model called RL-DBN. It is a hybridization of sigmoid belief networks and binary RBMs [<xref ref-type="bibr" rid="ref-17">17</xref>] and is significant to highpoint few &#x2018;tricks&#x2019; to utilize the data given in layer-by-layer. DBN is treated as a hybridized network which models the prior distribution of data in a layer-by-layer manner so as to improve the lower bound from model distribution. It is inspired to utilize the data learned at all the stacks of RBM for reinforcement since the pretraining of greedy layer activates the latent binary variable as the input of subsequent visible layer. The activation function is represented in <xref ref-type="disp-formula" rid="eqn-2">Eq. (2)</xref>, and the corresponding preactivation vector, <inline-formula id="ieqn-61"><mml:math id="mml-ieqn-61"><mml:msup><mml:mi>a</mml:mi><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>l</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msup></mml:math></inline-formula>, is given below:</p>
<p><disp-formula id="eqn-6">
<label>(6)</label>
<mml:math id="mml-eqn-6" display="block"><mml:msubsup><mml:mi>a</mml:mi><mml:mrow><mml:mi>j</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>l</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:msubsup><mml:mi>c</mml:mi><mml:mrow><mml:mi>j</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>l</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>+</mml:mo><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>m</mml:mi></mml:mrow></mml:munderover><mml:msubsup><mml:mi>w</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>l</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:msubsup><mml:mi>x</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>l</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:math>
</disp-formula></p>
<p>where, <inline-formula id="ieqn-62"><mml:math id="mml-ieqn-62"><mml:msubsup><mml:mi>c</mml:mi><mml:mrow><mml:mi>j</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>l</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:math></inline-formula> denotes the bias in hidden layer <inline-formula id="ieqn-63"><mml:math id="mml-ieqn-63"><mml:mi>l</mml:mi><mml:mo>,</mml:mo></mml:math></inline-formula> <inline-formula id="ieqn-64"><mml:math id="mml-ieqn-64"><mml:mi>m</mml:mi></mml:math></inline-formula> is the unit count that exists in the earlier layer, <inline-formula id="ieqn-65"><mml:math id="mml-ieqn-65"><mml:msubsup><mml:mi>w</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>l</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:math></inline-formula> denotes the weight matrix for layer <inline-formula id="ieqn-66"><mml:math id="mml-ieqn-66"><mml:mi>l</mml:mi></mml:math></inline-formula>, and <inline-formula id="ieqn-67"><mml:math id="mml-ieqn-67"><mml:msubsup><mml:mi>x</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>l</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:math></inline-formula> indicates the input data from layer <inline-formula id="ieqn-68"><mml:math id="mml-ieqn-68"><mml:mi>l</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:math></inline-formula>, where <inline-formula id="ieqn-69"><mml:math id="mml-ieqn-69"><mml:msubsup><mml:mi>x</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mn>0</mml:mn></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:msub><mml:mi>v</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>.</mml:mo></mml:math></inline-formula></p>
<p>Consequently, it is probable to utilize the &#x2018;reinforcement preactivation&#x2019; vector, represented by <italic>&#x00E2;</italic>(l), from layer <inline-formula id="ieqn-70"><mml:math id="mml-ieqn-70"><mml:mi>l</mml:mi><mml:mo>,</mml:mo></mml:math></inline-formula> <inline-formula id="ieqn-71"><mml:math id="mml-ieqn-71"><mml:mi mathvariant="normal">&#x2200;</mml:mi><mml:mspace width="thinmathspace" /><mml:mi>l</mml:mi><mml:mo>&#x003E;</mml:mo></mml:math></inline-formula> <inline-formula id="ieqn-72"><mml:math id="mml-ieqn-72"><mml:mn>1</mml:mn></mml:math></inline-formula>. Since the classical RBM outcome of post activation lies in <inline-formula id="ieqn-73"><mml:math id="mml-ieqn-73"><mml:mo stretchy="false">[</mml:mo><mml:mn>0</mml:mn></mml:math></inline-formula>, 1<inline-formula id="ieqn-74"><mml:math id="mml-ieqn-74"><mml:mo stretchy="false">]</mml:mo></mml:math></inline-formula> interval, it is essential to restrict the reinforcement element of the presented method as given herewith.</p>
<p><disp-formula id="eqn-7">
<label>(7)</label>
<mml:math id="mml-eqn-7" display="block"><mml:msup><mml:mrow><mml:mover><mml:mi>a</mml:mi><mml:mo stretchy="false">&#x005E;</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>l</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msup><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi>&#x03B4;</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:msup><mml:mi>a</mml:mi><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>l</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msup><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mo movablelimits="true" form="prefix">max</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mi>&#x03B4;</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:msubsup><mml:mi>a</mml:mi><mml:mrow><mml:mi>j</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>l</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>)</mml:mo></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow></mml:mfrac></mml:math>
</disp-formula></p>
<p>where, <inline-formula id="ieqn-75"><mml:math id="mml-ieqn-75"><mml:mi>&#x03B4;</mml:mi></mml:math></inline-formula> denotes the rectifier function and <inline-formula id="ieqn-76"><mml:math id="mml-ieqn-76"><mml:mi>m</mml:mi><mml:mi>a</mml:mi><mml:mi>x</mml:mi></mml:math></inline-formula> offers the maximal value from &#x03B4; output vector to normalize it. Afterwards, novel input data and the data aggregated at layer <inline-formula id="ieqn-77"><mml:math id="mml-ieqn-77"><mml:mi>l</mml:mi></mml:math></inline-formula> are represented by the addition of values achieved in <xref ref-type="disp-formula" rid="eqn-5">Eq. (5)</xref> for post-activation, <italic>i.e</italic>., implementation of <inline-formula id="ieqn-78"><mml:math id="mml-ieqn-78"><mml:mi>&#x03C3;</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:msup><mml:mi>a</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>l</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msup><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula>:</p>
<p><disp-formula id="eqn-8">
<label>(8)</label>
<mml:math id="mml-eqn-8" display="block"><mml:msubsup><mml:mi>x</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>l</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:mi>&#x03C3;</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:msubsup><mml:mi>a</mml:mi><mml:mrow><mml:mi>j</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>l</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>)</mml:mo></mml:mrow><mml:mo>+</mml:mo><mml:msubsup><mml:mrow><mml:mover><mml:mi>a</mml:mi><mml:mo stretchy="false">&#x005E;</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>l</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:math>
</disp-formula></p>
<p>where <inline-formula id="ieqn-79"><mml:math id="mml-ieqn-79"><mml:msubsup><mml:mi>x</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>l</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:math></inline-formula> denotes the new input data for layer <inline-formula id="ieqn-80"><mml:math id="mml-ieqn-80"><mml:mi>l</mml:mi><mml:mo>,</mml:mo></mml:math></inline-formula> <inline-formula id="ieqn-81"><mml:math id="mml-ieqn-81"><mml:mi mathvariant="normal">&#x2200;</mml:mi><mml:mi>l</mml:mi><mml:mo>&#x003E;</mml:mo><mml:mn>1</mml:mn></mml:math></inline-formula> whereas the normalized and vectorized forms are provided herewith.</p>
<p><disp-formula id="eqn-9">
<label>(9)</label>
<mml:math id="mml-eqn-9" display="block"><mml:msup><mml:mi>x</mml:mi><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>l</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msup><mml:mo>=</mml:mo><mml:mfrac><mml:msup><mml:mi>x</mml:mi><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>l</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msup><mml:mrow><mml:mo movablelimits="true" form="prefix">max</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:msubsup><mml:mi>x</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>l</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>}</mml:mo></mml:mrow></mml:mrow></mml:mfrac></mml:math>
</disp-formula></p>
</sec>
<sec id="s3_3">
<label>3.3</label>
<title>Parameter Optimization Process</title>
<p>In order to tune the parameters of DBN model, BWO algorithm is employed. BWO algorithm imitates the routine development of BW spiders. In general, female BW spiders construct the net at night time and deposit few pheromones in nearby places in the net to attract the male black spiders for matting [<xref ref-type="bibr" rid="ref-18">18</xref>]. Male BW spiders get attracted towards the pheromone and enter the web. Female BW spider consumes the male BW spider after mating process is over. Next to mating, female BWs lay the egg sock on net. After 11 days of incubation, young spider lings get hatched out of eggs and involve in sibling cannibalism. It stays back in the net where it got hatched for a shorter duration while in some cases, they are consumed by their mother too. Rest of the young spiders are treated as fit spiders. <?A3B2 "fig3",5,"anchor"?><xref ref-type="fig" rid="fig-3">Fig. 3</xref> illustrates the lifecycle of a black widow. BWO algorithm follows the concept discussed herewith.</p>
<fig id="fig-3">
<label>Figure 3</label>
<caption>
<title>The lifecycle of black widow</title>
</caption>
<graphic mimetype="image" mime-subtype="png" xlink:href="CMC_20066-fig-3.png"/>
</fig>
<p>BWO algorithm begins with an arbitrary initial BW spider population which includes both male and female BW spiders to generate offspring for subsequent life cycle. The initial population of BW spiders is defined in <xref ref-type="disp-formula" rid="eqn-8">Eq. (8)</xref>.</p>
<p><disp-formula id="eqn-10">
<label>(10)</label>
<mml:math id="mml-eqn-10" display="block"><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>N</mml:mi><mml:mo>,</mml:mo><mml:mi>d</mml:mi><mml:mo>=</mml:mo></mml:mrow></mml:msub><mml:mrow><mml:mo>[</mml:mo><mml:mtable columnalign="left left left left left" rowspacing="1em 1em 0.4em" columnspacing="1em"><mml:mtr><mml:mtd><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mspace width="thinmathspace" /><mml:mn>1</mml:mn></mml:mrow></mml:msub></mml:mtd><mml:mtd><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mspace width="thinmathspace" /><mml:mn>2</mml:mn></mml:mrow></mml:msub></mml:mtd><mml:mtd><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mspace width="thinmathspace" /><mml:mn>3</mml:mn></mml:mrow></mml:msub></mml:mtd><mml:mtd><mml:mo>&#x22EF;</mml:mo></mml:mtd><mml:mtd><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mspace width="thinmathspace" /><mml:mi>d</mml:mi></mml:mrow></mml:msub></mml:mtd></mml:mtr><mml:mtr><mml:mtd /><mml:mtd /><mml:mtd><mml:mo>&#x22EE;</mml:mo></mml:mtd><mml:mtd /><mml:mtd /></mml:mtr><mml:mtr><mml:mtd><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mi>N</mml:mi><mml:mo>,</mml:mo><mml:mspace width="thinmathspace" /><mml:mn>1</mml:mn></mml:mrow></mml:msub></mml:mtd><mml:mtd><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mi>N</mml:mi><mml:mo>,</mml:mo><mml:mspace width="thinmathspace" /><mml:mn>2</mml:mn></mml:mrow></mml:msub></mml:mtd><mml:mtd><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mi>N</mml:mi><mml:mo>,</mml:mo><mml:mspace width="thinmathspace" /><mml:mn>3</mml:mn></mml:mrow></mml:msub></mml:mtd><mml:mtd><mml:mo>&#x22EF;</mml:mo></mml:mtd><mml:mtd><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mi>N</mml:mi><mml:mo>,</mml:mo><mml:mspace width="thinmathspace" /><mml:mi>d</mml:mi></mml:mrow></mml:msub></mml:mtd></mml:mtr></mml:mtable><mml:mo>]</mml:mo></mml:mrow></mml:math>
</disp-formula></p>
<p><disp-formula id="eqn-11">
<mml:math id="mml-eqn-11" display="block"><mml:mi>l</mml:mi><mml:mi>b</mml:mi><mml:mo>&#x2264;</mml:mo><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2264;</mml:mo><mml:mi>u</mml:mi><mml:mi>b</mml:mi></mml:math>
</disp-formula></p>
<p>where <inline-formula id="ieqn-82"><mml:math id="mml-ieqn-82"><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>N</mml:mi><mml:mo>,</mml:mo><mml:mspace width="thinmathspace" /><mml:mi>d</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> denotes the number of BW spiders, <inline-formula id="ieqn-83"><mml:math id="mml-ieqn-83"><mml:mi>d</mml:mi></mml:math></inline-formula> indicates the decision variable count, <inline-formula id="ieqn-84"><mml:math id="mml-ieqn-84"><mml:mi>N</mml:mi></mml:math></inline-formula> represents the population, <inline-formula id="ieqn-85"><mml:math id="mml-ieqn-85"><mml:mi>l</mml:mi><mml:mi>b</mml:mi></mml:math></inline-formula> and <inline-formula id="ieqn-86"><mml:math id="mml-ieqn-86"><mml:mi>u</mml:mi><mml:mi>b</mml:mi></mml:math></inline-formula> indicate the lower and upper bounds of population. A significant solution population <inline-formula id="ieqn-87"><mml:math id="mml-ieqn-87"><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>N</mml:mi><mml:mo>,</mml:mo><mml:mspace width="thinmathspace" /><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow></mml:math></inline-formula> is used for minimizing or maximizing the objective function as defined below.</p>
<p><disp-formula id="eqn-12">
<label>(11)</label>
<mml:math id="mml-eqn-12" display="block"><mml:mrow><mml:mi mathvariant="italic">O</mml:mi><mml:mi mathvariant="italic">b</mml:mi><mml:mi mathvariant="italic">j</mml:mi><mml:mi mathvariant="italic">e</mml:mi><mml:mi mathvariant="italic">c</mml:mi><mml:mi mathvariant="italic">t</mml:mi><mml:mi mathvariant="italic">i</mml:mi><mml:mi mathvariant="italic">v</mml:mi><mml:mi mathvariant="italic">e</mml:mi></mml:mrow><mml:mtext>&#xA0;</mml:mtext><mml:mrow><mml:mi mathvariant="italic">f</mml:mi><mml:mi mathvariant="italic">u</mml:mi><mml:mi mathvariant="italic">n</mml:mi><mml:mi mathvariant="italic">c</mml:mi><mml:mi mathvariant="italic">t</mml:mi><mml:mi mathvariant="italic">i</mml:mi><mml:mi mathvariant="italic">o</mml:mi><mml:mi mathvariant="italic">n</mml:mi></mml:mrow><mml:mo>=</mml:mo><mml:mi>f</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>N</mml:mi><mml:mo>,</mml:mo><mml:mspace width="thinmathspace" /><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow></mml:math>
</disp-formula></p>
<p>The subsequent process in BWO algorithm is the reproduction of young spider from mating process. During or after the mating process is over, the female BW eats the male BW. An arbitrary election procedure is employed to choose a pair of spiders to perform mating procedure so as to lay eggs that get hatched into young spiders. Then, the reproduction task of BWO algorithm is defined below:</p>
<p><disp-formula id="eqn-13">
<label>(12)</label>
<mml:math id="mml-eqn-13" display="block"><mml:msub><mml:mi>Y</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mspace width="thinmathspace" /><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mi>&#x03B2;</mml:mi><mml:mo>&#x00D7;</mml:mo><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mspace width="thinmathspace" /><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:mrow><mml:mo>(</mml:mo><mml:mn>1</mml:mn><mml:mo>&#x2212;</mml:mo><mml:mi>&#x03B2;</mml:mi><mml:mo>)</mml:mo></mml:mrow><mml:mo>&#x00D7;</mml:mo><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mspace width="thinmathspace" /><mml:mi>d</mml:mi></mml:mrow></mml:msub></mml:math>
</disp-formula></p>
<p><disp-formula id="eqn-14">
<mml:math id="mml-eqn-14" display="block"><mml:msub><mml:mi>Y</mml:mi><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mspace width="thinmathspace" /><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mi>&#x03B2;</mml:mi><mml:mo>&#x00D7;</mml:mo><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mspace width="thinmathspace" /><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:mrow><mml:mo>(</mml:mo><mml:mn>1</mml:mn><mml:mo>&#x2212;</mml:mo><mml:mi>&#x03B2;</mml:mi><mml:mo>)</mml:mo></mml:mrow><mml:mo>&#x00D7;</mml:mo><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mspace width="thinmathspace" /><mml:mi>d</mml:mi></mml:mrow></mml:msub></mml:math>
</disp-formula></p>
<p>where <inline-formula id="ieqn-88"><mml:math id="mml-ieqn-88"><mml:msub><mml:mi>Y</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mspace width="thinmathspace" /><mml:mi>d</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>, and <inline-formula id="ieqn-89"><mml:math id="mml-ieqn-89"><mml:msub><mml:mi>Y</mml:mi><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mspace width="thinmathspace" /><mml:mi>d</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> are the young spiders in reproduction, <inline-formula id="ieqn-90"><mml:math id="mml-ieqn-90"><mml:mi>i</mml:mi></mml:math></inline-formula> and <inline-formula id="ieqn-91"><mml:math id="mml-ieqn-91"><mml:mi>j</mml:mi></mml:math></inline-formula> denote the arbitrary numbers in the range of 1 to <inline-formula id="ieqn-92"><mml:math id="mml-ieqn-92"><mml:mi>N</mml:mi></mml:math></inline-formula> and &#x03B2; is the arbitrary number in the range of <inline-formula id="ieqn-93"><mml:math id="mml-ieqn-93"><mml:mn>0</mml:mn></mml:math></inline-formula> to 1. To avoid the arbitrary duplicative election of pairs, the reproduction procedure takes place for <inline-formula id="ieqn-94"><mml:math id="mml-ieqn-94"><mml:mi>d</mml:mi><mml:mrow><mml:mo>/</mml:mo></mml:mrow><mml:mn>2</mml:mn></mml:math></inline-formula> times.</p>
<p>Next to reproduction, the population of mother and young spiders are arranged based on fitness value and the rate of cannibalism. At the time of optimization model, three cannibalism processes are considered. Sexual cannibalism is the primary one in which the female BW eats the male BW during or after the mating process. It is employed as fitness value of the female as well as male spider population. In sibling cannibalism, a strong young spider eats the weaker ones and is employed using the cannibalism rate. Finally, the mother BW gets eaten by their young ones. This mechanism makes use of fitness value of mothers as well as young spiders. Mutation is a subsequent procedure in BWO algorithm. A young spider is selected based on mutation rate and minor arbitrary value with the chosen young spiders for mutation; and this procedure is defined below.</p>
<p><disp-formula id="eqn-15">
<label>(13)</label>
<mml:math id="mml-eqn-15" display="block"><mml:msub><mml:mi>Z</mml:mi><mml:mrow><mml:mi>k</mml:mi><mml:mo>,</mml:mo><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>Y</mml:mi><mml:mrow><mml:mi>k</mml:mi><mml:mo>,</mml:mo><mml:mspace width="thinmathspace" /><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:mi>&#x03B1;</mml:mi></mml:math>
</disp-formula></p>
<p>where <inline-formula id="ieqn-95"><mml:math id="mml-ieqn-95"><mml:msub><mml:mi>Z</mml:mi><mml:mrow><mml:mi>k</mml:mi><mml:mo>,</mml:mo><mml:mspace width="thinmathspace" /><mml:mi>d</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> refers to the mutated spider population, <inline-formula id="ieqn-96"><mml:math id="mml-ieqn-96"><mml:msub><mml:mi>Y</mml:mi><mml:mrow><mml:mi>k</mml:mi><mml:mo>,</mml:mo><mml:mspace width="thinmathspace" /><mml:mi>d</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> denotes the arbitrarily-elected young spider, <inline-formula id="ieqn-97"><mml:math id="mml-ieqn-97"><mml:mi>k</mml:mi></mml:math></inline-formula> implies the arbitrary number, and <inline-formula id="ieqn-98"><mml:math id="mml-ieqn-98"><mml:mi>&#x03B1;</mml:mi></mml:math></inline-formula> represents the arbitrary mutation value. <?A3B2 "fig4",5,"anchor"?><xref ref-type="fig" rid="fig-4">Fig. 4</xref> illustrates the flowchart of BWO technique. BWO algorithm relies on three distinct variables such as reproduction rate (RP), cannibalism rate (CP), and mutation rate (MR). Here, RP is used to control the production of young spiders and offers chances to explore the search space so as to determine the optimal solution. CP is applied in controlling the weaker fitness population and the fittest one is enabled to go to the subsequent round. Finally, MR is employed in the management of diversity in present to subsequent rounds.</p>
<fig id="fig-4">
<label>Figure 4</label>
<caption>
<title>The flowchart of BWO algorithm</title>
</caption>
<graphic mimetype="image" mime-subtype="png" xlink:href="CMC_20066-fig-4.png"/>
</fig>
</sec>
</sec>
<sec id="s4">
<label>4</label>
<title>Experimental Validation</title>
<p>The presented DRL-BWO model was experimentally validated using NSL-KDD dataset [<xref ref-type="bibr" rid="ref-19">19</xref>]. It includes a total of 45927 instances under DoS attack, 995 instances under R2l attack, 11656 instances under Probe attack, 52 instances under U2r attack, and 67343 instances under Normal class as shown in <?A3B2 "tbl1",5,"anchor"?><xref ref-type="table" rid="table-1">Tab. 1</xref>. The parameter setting is given herewith; batch size: 128, learning rate: 0.001, epoch count: 500, and momentum: 0.2. Besides, the study made use of a 10-fold cross validation to split the dataset into training and testing datasets.</p>
<table-wrap id="table-1">
<label>Table 1</label>
<caption>
<title>Types of attacks in NSL-KDD dataset</title>
</caption>
<table>
<colgroup>
<col/>
<col/>
<col/>
</colgroup>
<thead>
<tr>
<th>Attack type</th>
<th>Description</th>
<th>No. of samples</th>
</tr>
</thead>
<tbody>
<tr>
<td>Dos</td>
<td>Denial of service attack</td>
<td>45,927</td>
</tr>
<tr>
<td>R2l</td>
<td>Unauthorized access from a remote host</td>
<td>995</td>
</tr>
<tr>
<td>Probe</td>
<td>Port monitoring or scanning</td>
<td>11,656</td>
</tr>
<tr>
<td>U2r</td>
<td>Unauthorized local super user privileged access</td>
<td>52</td>
</tr>
<tr>
<td>Normal</td>
<td>Not a attack</td>
<td>67,343</td>
</tr>
</tbody>
</table>
</table-wrap>
<p><?A3B2 "tbl2",5,"anchor"?><xref ref-type="table" rid="table-2">Tab. 2</xref> and <?A3B2 "fig5",5,"anchor"?><xref ref-type="fig" rid="fig-5">Fig. 5</xref> shows the results of detection analysis of DRL-BWO model in terms of distinct measures. The experimental values showcase that the DRL-BWO method has effectively detected different types of attacks in UAV networks. For instance, the samples under &#x2018;DoS&#x2019; attack type were detected at a precision of 0.975, recall of 0.990, F-measure of 0.981, and accuracy of 0.986. Eventually, the instances under &#x2018;R21&#x2019; attack type got detected at a precision of 0.986, recall of 0.997, F-measure of 0.993, and accuracy of 0.991. Concurrently, the examples under &#x2018;Probe&#x2019; attack type were detected at a precision of 0.988, recall of 0.998, F-measure of 0.996, and accuracy of 0.993. Simultaneously, the samples under &#x2018;U2r&#x2019; attack type got detected at a precision of 0.989, recall of 0.991, F-measure of 0.985, and accuracy of 0.985. In line with these, the instances under &#x2018;Normal&#x2019; attack type were detected at a precision of 0.987, recall of 0.988, F-measure of 0.986, and accuracy of 0.988.</p>
<table-wrap id="table-2">
<label>Table 2</label>
<caption>
<title>Result for the analysis of the proposed DRL-BWO method</title>
</caption>
<table>
<colgroup>
<col/>
<col/>
<col/>
<col/>
<col/>
</colgroup>
<thead>
<tr>
<th>Attack type</th>
<th>Precision</th>
<th>Recall</th>
<th>F-measure</th>
<th>Accuracy</th>
</tr>
</thead>
<tbody>
<tr>
<td>Dos</td>
<td>0.975</td>
<td>0.990</td>
<td>0.981</td>
<td>0.986</td>
</tr>
<tr>
<td>R2l</td>
<td>0.986</td>
<td>0.997</td>
<td>0.993</td>
<td>0.991</td>
</tr>
<tr>
<td>Probe</td>
<td>0.988</td>
<td>0.998</td>
<td>0.996</td>
<td>0.993</td>
</tr>
<tr>
<td>U2r</td>
<td>0.989</td>
<td>0.991</td>
<td>0.985</td>
<td>0.985</td>
</tr>
<tr>
<td>Normal</td>
<td>0.987</td>
<td>0.988</td>
<td>0.986</td>
<td>0.988</td>
</tr>
<tr>
<td>Average</td>
<td>0.985</td>
<td>0.993</td>
<td>0.988</td>
<td>0.989</td>
</tr>
</tbody>
</table>
</table-wrap>
<p><?A3B2 "tbl3",5,"anchor"?><xref ref-type="table" rid="table-3">Tab. 3</xref> compares the results of the detection analysis attained by DRL-BWO model against existing methods in terms of distinct measures [<xref ref-type="bibr" rid="ref-20">20</xref>&#x2013;<xref ref-type="bibr" rid="ref-25">25</xref>]. <?A3B2 "fig6",5,"anchor"?><xref ref-type="fig" rid="fig-6">Fig. 6</xref> shows the results of precision and recall analysis of DRL-BWO against existing techniques. The figure portrays that the detection performance of IDBN model got reduced since it achieved a minimal precision of 0.904 and recall of 0.92. Besides, the AK-NN model showcased a slightly higher detection outcome and it achieved a precision of 0.922 and recall of 0.938. Followed by, the DL model accomplished an even-more increased performance and accomplished a precision of 0.935 and recall of 0.949. Moreover, the DPC-DBN model depicted a moderate outcome with a precision of 0.951 and recall of 0.95. Furthermore, the DT model attempted to exhibit reasonable results with a precision of 0.966 and recall of 0.928. In line with these, the Adaboost method showcased somewhat acceptable outcome with a precision of 0.974 and recall of 0.932. Simultaneously, the T-SID model obtained a closer precision of 0.975 and recall of 0.952.</p>
<fig id="fig-5">
<label>Figure 5</label>
<caption>
<title>Intrusion detection results analysis of the DRL-BWO model</title>
</caption>
<graphic mimetype="image" mime-subtype="png" xlink:href="CMC_20066-fig-5.png"/>
</fig>
<table-wrap id="table-3">
<label>Table 3</label>
<caption>
<title>Comparison of results from intrusion detection analysis of DRL-BWO method against existing methods</title>
</caption>
<table>
<colgroup>
<col/>
<col/>
<col/>
<col/>
<col/>
</colgroup>
<thead>
<tr>
<th>Methods</th>
<th>Precision</th>
<th>Recall</th>
<th>F-measure</th>
<th>Accuracy</th>
</tr>
</thead>
<tbody>
<tr>
<td>DRL-BWO</td>
<td>0.985</td>
<td>0.993</td>
<td>0.988</td>
<td>0.989</td>
</tr>
<tr>
<td>IDBN</td>
<td>0.904</td>
<td>0.920</td>
<td>0.908</td>
<td>0.962</td>
</tr>
<tr>
<td>T-SID</td>
<td>0.975</td>
<td>0.952</td>
<td>0.973</td>
<td>0.940</td>
</tr>
<tr>
<td>Deep learning</td>
<td>0.935</td>
<td>0.949</td>
<td>0.941</td>
<td>0.928</td>
</tr>
<tr>
<td>DPC-DBN</td>
<td>0.951</td>
<td>0.950</td>
<td>0.951</td>
<td>0.950</td>
</tr>
<tr>
<td>AK-NN</td>
<td>0.922</td>
<td>0.938</td>
<td>0.929</td>
<td>0.920</td>
</tr>
<tr>
<td>Decision tree</td>
<td>0.966</td>
<td>0.928</td>
<td>0.954</td>
<td>0.937</td>
</tr>
<tr>
<td>Adaboost</td>
<td>0.974</td>
<td>0.932</td>
<td>0.957</td>
<td>0.959</td>
</tr>
<tr>
<td>Random forest</td>
<td>0.981</td>
<td>0.938</td>
<td>0.959</td>
<td>0.960</td>
</tr>
<tr>
<td>SVM</td>
<td>0.980</td>
<td>0.944</td>
<td>0.966</td>
<td>0.963</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Concurrently, the SVM model demonstrated a certainly satisfactory outcome with a precision of 0.98 and recall of 0.944. Although the RF model attained a near optimum precision of 0.981 and recall of 0.938, the presented DRL-BWO model outperformed all the existing methods by accomplishing a maximum precision of 0.985 and recall of 0.993.</p>
<p><?A3B2 "fig7",5,"anchor"?><xref ref-type="fig" rid="fig-7">Fig. 7</xref> examines the results of F-measure and accuracy analysis of DRL-BWO against existing methods. The figure portrays that the detection performance of IDBN technique got reduced as it offered a low F-measure of 0.908 and accuracy of 0.962. In line with this, the AK-NN approach demonstrated somewhat higher detection result as it yielded an F-measure of 0.929 and accuracy of 0.92. Along with that, the DL algorithm accomplished superior performance by obtaining an F-measure of 0.941 and accuracy of 0.928. In addition, the DPC-DBN model exhibited moderate results with an F-measure of 0.951 and accuracy of 0.95. Additionally, the DT model attempted to demonstrate reasonable outcomes with an F-measure of 0.954 and accuracy of 0.937.</p>
<fig id="fig-6">
<label>Figure 6</label>
<caption>
<title>Comparative analysis of DRL-BWO model in terms of precision and recall</title>
</caption>
<graphic mimetype="image" mime-subtype="png" xlink:href="CMC_20066-fig-6.png"/>
</fig>
<fig id="fig-7">
<label>Figure 7</label>
<caption>
<title>Comparative analysis of DRL-BWO model in terms of F-measure and accuracy</title>
</caption>
<graphic mimetype="image" mime-subtype="png" xlink:href="CMC_20066-fig-7.png"/>
</fig>
<p>Similarly, the Adaboost method also outperformed with slightly acceptable outcome through an F-measure of 0.957 and accuracy of 0.959. At the same time, the RF technique attained a closer F-measure of 0.959 and an accuracy of 0.96. Further, the SVM technique portrayed a certainly satisfactory outcome with an F-measure of 0.966 and accuracy of 0.963. However, the T-SID method achieved a near optimum F-measure of 0.973 and accuracy of 0.940. In this scenario, the proposed DRL-BWO methodology outperformed the existing techniques and accomplished a superior F-measure of 0.988 and accuracy of 0.989. From the above discussed results of the analysis, it is apparent that the DRL-BWO algorithm is an efficient tool for UAV networks as it achieved improved outcomes. The DRL-BWO algorithm produced higher precision, recall, F-measure, and accuracy values such as 0.985, 0.993, 0.988, and 0.989 respectively.</p>
</sec>
<sec id="s5">
<label>5</label>
<title>Conclusion</title>
<p>The current research article developed a DRL-BWO algorithm for intrusion detection in UAV networks. Primarily, the networking data, fed as input, undergoes preprocessing to remove the unwanted data and transform it into a compatible format. Besides, the DRL involves improved reinforcement learning-based DBN for intrusion detection. Then, the DBN model is applied in the determination of existence of intrusions in UAV networks. At last, BWO algorithm is employed to determine the optimal hyperparameter values involved in the presented model. A comprehensive set of experimental analyses was conducted to highlight the supremacy of the proposed model. From the simulation values, it is evident that the proposed method is an appropriate method as it obtained high precision, recall, F-measure, and accuracy values such as 0.985, 0.993, 0.988, and 0.989 respectively. The model is found to be fit for information extraction tasks in high dimensional space. In addition, the application of BWO algorithm helps in fine tuning the classification performance of DBN model. In future, intrusion detection performance can be further improved using feature selection algorithms.</p>
</sec>
</body>
<back>
<fn-group>
<fn fn-type="other">
<p><bold>Funding Statement:</bold> This work is also supported by the Faculty of Computer Science and Information Technology, University of Malaya under Postgraduate Research Grant (PG035-2016A).</p>
</fn>
<fn fn-type="conflict">
<p><bold>Conflicts of Interest:</bold> The authors declare that they have no conflicts of interest to report regarding the present study.</p>
</fn>
</fn-group>
<ref-list content-type="authoryear">
<title>References</title>
<ref id="ref-1"><label>[1]</label><mixed-citation publication-type="other"><person-group person-group-type="author"><string-name><given-names>F.</given-names> <surname>Al-Turjman</surname></string-name>, <string-name><given-names>H.</given-names> <surname>Zahmatkesh</surname></string-name> and <string-name><given-names>R.</given-names> <surname>Shahroze</surname></string-name></person-group>, &#x201C;<article-title>An overview of security and privacy in smart cities&#x2019; IoT communications</article-title>,&#x201D; <source>Transactions on Emerging Telecommunications Technologies</source>, pp. <fpage>1</fpage>&#x2013;<lpage>20</lpage>, <year>2019</year>. [Online]. Available: <uri>https://doi.org/10.1002/ett.3677</uri>.</mixed-citation></ref>
<ref id="ref-2"><label>[2]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>Y.</given-names> <surname>Zhang</surname></string-name>, <string-name><given-names>P.</given-names> <surname>Li</surname></string-name> and <string-name><given-names>X.</given-names> <surname>Wang</surname></string-name></person-group>, &#x201C;<article-title>Intrusion detection for IoT based on improved genetic algorithm and deep belief network</article-title>,&#x201D; <source>IEEE Access</source>, vol. <volume>7</volume>, pp. <fpage>31711</fpage>&#x2013;<lpage>31722</lpage>, <year>2019</year>.</mixed-citation></ref>
<ref id="ref-3"><label>[3]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>X.</given-names> <surname>Sun</surname></string-name>, <string-name><given-names>D. W. K.</given-names> <surname>Ng</surname></string-name>, <string-name><given-names>Z.</given-names> <surname>Ding</surname></string-name>, <string-name><given-names>Y.</given-names> <surname>Xu</surname></string-name> and <string-name><given-names>Z.</given-names> <surname>Zhong</surname></string-name></person-group>, &#x201C;<article-title>Physical layer security in UAV systems: Challenges and opportunities</article-title>,&#x201D; <source>IEEE Wireless Communications</source>, vol. <volume>26</volume>, no. <issue>5</issue>, pp. <fpage>40</fpage>&#x2013;<lpage>47</lpage>, <year>2019</year>.</mixed-citation></ref>
<ref id="ref-4"><label>[4]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>K.</given-names> <surname>Lei</surname></string-name>, <string-name><given-names>Q.</given-names> <surname>Zhang</surname></string-name>, <string-name><given-names>J.</given-names> <surname>Lou</surname></string-name>, <string-name><given-names>B.</given-names> <surname>Bai</surname></string-name> and <string-name><given-names>K.</given-names> <surname>Xu</surname></string-name></person-group>, &#x201C;<article-title>Securing ICN-based UAV ad hoc networks with blockchain</article-title>,&#x201D; <source>IEEE Communications Magazine</source>, vol. <volume>57</volume>, no. <issue>6</issue>, pp. <fpage>26</fpage>&#x2013;<lpage>32</lpage>, <year>2019</year>.</mixed-citation></ref>
<ref id="ref-5"><label>[5]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>S. A. R.</given-names> <surname>Shah</surname></string-name> and <string-name><given-names>B.</given-names> <surname>Issac</surname></string-name></person-group>, &#x201C;<article-title>Performance comparison of intrusion detection systems and application of machine learning to Snort system</article-title>,&#x201D; <source>Future Generation Computer Systems</source>, vol. <volume>80</volume>, no. <issue>3</issue>, pp. <fpage>157</fpage>&#x2013;<lpage>170</lpage>, <year>2018</year>.</mixed-citation></ref>
<ref id="ref-6"><label>[6]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>E.</given-names> <surname>Kabir</surname></string-name>, <string-name><given-names>J.</given-names> <surname>Hu</surname></string-name>, <string-name><given-names>H.</given-names> <surname>Wang</surname></string-name> and <string-name><given-names>G.</given-names> <surname>Zhuo</surname></string-name></person-group>, &#x201C;<article-title>A novel statistical technique for intrusion detection systems</article-title>,&#x201D; <source>Future Generation Computer Systems</source>, vol. <volume>79</volume>, no. <issue>3</issue>, pp. <fpage>303</fpage>&#x2013;<lpage>318</lpage>, <year>2018</year>.</mixed-citation></ref>
<ref id="ref-7"><label>[7]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>H.</given-names> <surname>Wang</surname></string-name>, <string-name><given-names>J.</given-names> <surname>Gu</surname></string-name> and <string-name><given-names>S.</given-names> <surname>Wang</surname></string-name></person-group>, &#x201C;<article-title>An effective intrusion detection framework based on SVM with feature augmentation</article-title>,&#x201D; <source>Knowledge-Based Systems</source>, vol. <volume>136</volume>, no. <issue>1</issue>, pp. <fpage>130</fpage>&#x2013;<lpage>139</lpage>, <year>2017</year>.</mixed-citation></ref>
<ref id="ref-8"><label>[8]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>H. I.</given-names> <surname>Ahmed</surname></string-name>, <string-name><given-names>N. A.</given-names> <surname>Elfeshawy</surname></string-name>, <string-name><given-names>S. F.</given-names> <surname>Elzoghdy</surname></string-name>, <string-name><given-names>H. S.</given-names> <surname>El-sayed</surname></string-name> and <string-name><given-names>O. S.</given-names> <surname>Faragallah</surname></string-name></person-group>, &#x201C;<article-title>A neural network-based learning algorithm for intrusion detection systems</article-title>,&#x201D; <source>Wireless Personal Communications</source>, vol. <volume>97</volume>, no. <issue>2</issue>, pp. <fpage>3097</fpage>&#x2013;<lpage>3112</lpage>, <year>2017</year>.</mixed-citation></ref>
<ref id="ref-9"><label>[9]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>W.</given-names> <surname>Hu</surname></string-name>, <string-name><given-names>J.</given-names> <surname>Gao</surname></string-name>, <string-name><given-names>Y.</given-names> <surname>Wang</surname></string-name>, <string-name><given-names>O.</given-names> <surname>Wu</surname></string-name> and <string-name><given-names>S.</given-names> <surname>Maybank</surname></string-name></person-group>, &#x201C;<article-title>Online adaboost-based parameterized methods for dynamic distributed network intrusion detection</article-title>,&#x201D; <source>IEEE Transactions on Cybernetics</source>, vol. <volume>44</volume>, no. <issue>1</issue>, pp. <fpage>66</fpage>&#x2013;<lpage>82</lpage>, <year>2014</year>.</mixed-citation></ref>
<ref id="ref-10"><label>[10]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>T.</given-names> <surname>Ma</surname></string-name>, <string-name><given-names>F.</given-names> <surname>Wang</surname></string-name>, <string-name><given-names>J.</given-names> <surname>Cheng</surname></string-name>, <string-name><given-names>Y.</given-names> <surname>Yu</surname></string-name> and <string-name><given-names>X.</given-names> <surname>Chen</surname></string-name></person-group>, &#x201C;<article-title>A hybrid spectral clustering and deep neural network ensemble algorithm for intrusion detection in sensor networks</article-title>,&#x201D; <source>Sensors</source>, vol. <volume>16</volume>, no. <issue>10</issue>, pp. <fpage>1701</fpage>, <year>2016</year>.</mixed-citation></ref>
<ref id="ref-11"><label>[11]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>G. E.</given-names> <surname>Hinton</surname></string-name>, <string-name><given-names>S.</given-names> <surname>Osindero</surname></string-name> and <string-name><given-names>Y.-W.</given-names> <surname>Teh</surname></string-name></person-group>, &#x201C;<article-title>A fast learning algorithm for deep belief nets</article-title>,&#x201D; <source>Neural Computation</source>, vol. <volume>18</volume>, no. <issue>7</issue>, pp. <fpage>1527</fpage>&#x2013;<lpage>1554</lpage>, <year>2006</year>.</mixed-citation></ref>
<ref id="ref-12"><label>[12]</label><mixed-citation publication-type="conf-proc"><person-group person-group-type="author"><string-name><given-names>F.</given-names> <surname>Qu</surname></string-name>, <string-name><given-names>J.</given-names> <surname>Zhang</surname></string-name>, <string-name><given-names>Z.</given-names> <surname>Shao</surname></string-name> and <string-name><given-names>S.</given-names> <surname>Qi</surname></string-name></person-group>, &#x201C;<article-title>An intrusion detection model based on deep belief network</article-title>,&#x201D; in <conf-name>Proc. of the 2017 VI Int. Conf. on Network, Communication and Computing</conf-name>, <publisher-loc>Kunming, China</publisher-loc>, pp. <fpage>97</fpage>&#x2013;<lpage>101</lpage>, <year>2017</year>. </mixed-citation></ref>
<ref id="ref-13"><label>[13]</label><mixed-citation publication-type="conf-proc"><person-group person-group-type="author"><string-name><given-names>D.</given-names> <surname>Liang</surname></string-name> and <string-name><given-names>P.</given-names> <surname>Pan</surname></string-name></person-group>, &#x201C;<article-title>Research on intrusion detection system based on DBN-EL</article-title>,&#x201D; in <conf-name>2019 Int. Conf. on Communications, Information System and Computer Engineering</conf-name>, <publisher-loc>Haikou, China</publisher-loc>, <publisher-name>IEEE</publisher-name>, pp. <fpage>495</fpage>&#x2013;<lpage>499</lpage>, <year>2019</year>. </mixed-citation></ref>
<ref id="ref-14"><label>[14]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>A.</given-names> <surname>Israr</surname></string-name>, <string-name><given-names>G. E. M.</given-names> <surname>Abro</surname></string-name>, <string-name><given-names>M.</given-names> <surname>Sadiq Ali Khan</surname></string-name>, <string-name><given-names>M.</given-names> <surname>Farhan</surname></string-name> and <string-name><given-names>S. U. A.</given-names> <surname>Bin Mohd Zulkifli</surname></string-name></person-group>, &#x201C;<article-title>Internet of things (IoT)-enabled unmanned aerial vehicles for the inspection of construction sites: A vision and future directions</article-title>,&#x201D; <source>Mathematical Problems in Engineering</source>, vol. <volume>2021</volume>, pp. <fpage>1</fpage>&#x2013;<lpage>15</lpage>, <year>2021</year>.</mixed-citation></ref>
<ref id="ref-15"><label>[15]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>A.</given-names> <surname>Abdollahi</surname></string-name> and <string-name><given-names>M.</given-names> <surname>Fathi</surname></string-name></person-group>, &#x201C;<article-title>An intrusion detection system on ping of death attacks in IoT networks</article-title>,&#x201D; <source>Wireless Personal Communications</source>, vol. <volume>112</volume>, no. <issue>4</issue>, pp. <fpage>2057</fpage>&#x2013;<lpage>2070</lpage>, <year>2020</year>.</mixed-citation></ref>
<ref id="ref-16"><label>[16]</label><mixed-citation publication-type="conf-proc"><person-group person-group-type="author"><string-name><given-names>M.</given-names> <surname>Roder</surname></string-name>, <string-name><given-names>L. A.</given-names> <surname>Passos</surname></string-name>, <string-name><given-names>L. C. F.</given-names> <surname>Ribeiro</surname></string-name>, <string-name><given-names>C.</given-names> <surname>Pereira</surname></string-name> and <string-name><given-names>J. P.</given-names> <surname>Papa</surname></string-name></person-group>, &#x201C;<article-title>A layer-wise information reinforcement approach to improve learning in deep belief networks</article-title>,&#x201D; in <conf-name>Artificial Intelligence and Soft Computing, Proc.: Lecture Notes in Computer Science Book Series</conf-name>, <publisher-loc>New York City, NY, USA</publisher-loc>, vol. <volume>12415</volume>, pp. <fpage>231</fpage>&#x2013;<lpage>241</lpage>, <year>2020</year>. </mixed-citation></ref>
<ref id="ref-17"><label>[17]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>K.</given-names> <surname>Premkumar</surname></string-name>, <string-name><given-names>M.</given-names> <surname>Vishnupriya</surname></string-name>, <string-name><given-names>T. S.</given-names> <surname>Babu</surname></string-name>, <string-name><given-names>B. V.</given-names> <surname>Manikandan</surname></string-name> and <string-name><given-names>T.</given-names> <surname>Thamizhselvan</surname></string-name></person-group>, &#x201C;<article-title>Black widow optimization-based optimal pi-controlled wind turbine emulator</article-title>,&#x201D; <source>Sustainability</source>, vol. <volume>12</volume>, no. <issue>24</issue>, pp. <fpage>10357</fpage>, <year>2020</year>.</mixed-citation></ref>
<ref id="ref-18"><label>[18]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>J.</given-names> <surname>Li</surname></string-name>, <string-name><given-names>Z.</given-names> <surname>Zhao</surname></string-name>, <string-name><given-names>R.</given-names> <surname>Li</surname></string-name> and <string-name><given-names>H.</given-names> <surname>Zhang</surname></string-name></person-group>, &#x201C;<article-title>AI-based two-stage intrusion detection for software defined IoT networks</article-title>,&#x201D; <source>IEEE Internet of Things Journal</source>, vol. <volume>6</volume>, no. <issue>2</issue>, pp. <fpage>2093</fpage>&#x2013;<lpage>2102</lpage>, <year>2019</year>.</mixed-citation></ref>
<ref id="ref-19"><label>[19]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>A. A.</given-names> <surname>Diro</surname></string-name> and <string-name><given-names>N.</given-names> <surname>Chilamkurti</surname></string-name></person-group>, &#x201C;<article-title>Distributed attack detection scheme using deep learning approach for internet of things</article-title>,&#x201D; <source>Future Generation Computer Systems</source>, vol. <volume>82</volume>, no. <issue>6</issue>, pp. <fpage>761</fpage>&#x2013;<lpage>768</lpage>, <year>2018</year>.</mixed-citation></ref>
<ref id="ref-20"><label>[20]</label><mixed-citation publication-type="other">[Online]. Available: <uri>https://www.unb.ca/cic/datasets/nsl.html</uri>.</mixed-citation></ref>
<ref id="ref-21"><label>[21]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>Y.</given-names> <surname>Yang</surname></string-name>, <string-name><given-names>K.</given-names> <surname>Zheng</surname></string-name>, <string-name><given-names>C.</given-names> <surname>Wu</surname></string-name>, <string-name><given-names>X.</given-names> <surname>Niu</surname></string-name> and <string-name><given-names>Y.</given-names> <surname>Yang</surname></string-name></person-group>, &#x201C;<article-title>Building an effective intrusion detection system using the modified density peak clustering algorithm and deep belief networks</article-title>,&#x201D; <source>Applied Sciences</source>, vol. <volume>9</volume>, no. <issue>2</issue>, pp. <fpage>238</fpage>, <year>2019</year>.</mixed-citation></ref>
<ref id="ref-22"><label>[22]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>Y.</given-names> <surname>Djenouri</surname></string-name>, <string-name><given-names>A.</given-names> <surname>Belhadi</surname></string-name>, <string-name><given-names>J. C.-W.</given-names> <surname>Lin</surname></string-name> and <string-name><given-names>A.</given-names> <surname>Cano</surname></string-name></person-group>, &#x201C;<article-title>Adapted k-nearest neighbors for detecting anomalies on spatio-temporal traffic flow</article-title>,&#x201D; <source>IEEE Access</source>, vol. <volume>7</volume>, pp. <fpage>10015</fpage>&#x2013;<lpage>10027</lpage>, <year>2019</year>.</mixed-citation></ref>
<ref id="ref-23"><label>[23]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Aroosa</surname></string-name>, <string-name><given-names>S. S.</given-names> <surname>Ullah</surname></string-name>, <string-name><given-names>S.</given-names> <surname>Hussain</surname></string-name>, <string-name><given-names>R.</given-names> <surname>Alroobaea</surname></string-name> and <string-name><given-names>I.</given-names> <surname>Ali</surname></string-name></person-group>, &#x201C;<article-title>Securing NDN-based internet of health things through cost-effective signcryption scheme</article-title>,&#x201D; <source>Wireless Communications and Mobile Computing</source>, vol. <volume>2021</volume>, pp. <fpage>1</fpage>&#x2013;<lpage>13</lpage>, <year>2021</year>.</mixed-citation></ref>
<ref id="ref-24"><label>[24]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>A.</given-names> <surname>Abbas</surname></string-name>, <string-name><given-names>M.</given-names> <surname>Krichen</surname></string-name>, <string-name><given-names>R.</given-names> <surname>Alroobaea</surname></string-name>, <string-name><given-names>S.</given-names> <surname>Malebary</surname></string-name>, <string-name><given-names>U.</given-names> <surname>Tariq</surname></string-name> <etal>et al.</etal></person-group><italic>,</italic> &#x201C;<article-title>An opportunistic data dissemination for autonomous vehicles communication</article-title>,&#x201D; <source>Soft Computing, Feb</source>, vol. <volume>76</volume>, no. <issue>4</issue>, pp. <fpage>2665</fpage>, <year>2021</year>.</mixed-citation></ref>
<ref id="ref-25"><label>[25]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>W.</given-names> <surname>Alhakami</surname></string-name>, <string-name><given-names>A.</given-names> <surname>ALharbi</surname></string-name>, <string-name><given-names>S.</given-names> <surname>Bourouis</surname></string-name>, <string-name><given-names>R.</given-names> <surname>Alroobaea</surname></string-name> and <string-name><given-names>N.</given-names> <surname>Bouguila</surname></string-name></person-group>, &#x201C;<article-title>Network anomaly intrusion detection using a nonparametric bayesian approach and feature selection</article-title>,&#x201D; <source>IEEE Access</source>, vol. <volume>7</volume>, pp. <fpage>52181</fpage>&#x2013;<lpage>52190</lpage>, <year>2019</year>.</mixed-citation></ref>
</ref-list>
</back>
</article>
