<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.1 20151215//EN" "http://jats.nlm.nih.gov/publishing/1.1/JATS-journalpublishing1.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:mml="http://www.w3.org/1998/Math/MathML" xml:lang="en" article-type="research-article" dtd-version="1.1">
<front>
<journal-meta>
<journal-id journal-id-type="pmc">EE</journal-id>
<journal-id journal-id-type="nlm-ta">EE</journal-id>
<journal-id journal-id-type="publisher-id">EE</journal-id>
<journal-title-group>
<journal-title>Energy Engineering</journal-title>
</journal-title-group>
<issn pub-type="epub">1546-0118</issn>
<issn pub-type="ppub">0199-8595</issn>
<publisher>
<publisher-name>Tech Science Press</publisher-name>
<publisher-loc>USA</publisher-loc>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">40887</article-id>
<article-id pub-id-type="doi">10.32604/ee.2023.040887</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Article</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>The Short-Term Prediction of Wind Power Based on the Convolutional Graph Attention Deep Neural Network</article-title>
<alt-title alt-title-type="left-running-head">The Short-term Prediction of Wind Power Based on the Convolutional Graph Attention Deep Neural Network</alt-title>
<alt-title alt-title-type="right-running-head">The Short-term Prediction of Wind Power Based on the Convolutional Graph Attention Deep Neural Network</alt-title>
</title-group>
<contrib-group>
<contrib id="author-1" contrib-type="author">
<name name-style="western"><surname>Xiao</surname><given-names>Fan</given-names></name><xref ref-type="aff" rid="aff-1">1</xref></contrib>
<contrib id="author-2" contrib-type="author">
<name name-style="western"><surname>Ping</surname><given-names>Xiong</given-names></name><xref ref-type="aff" rid="aff-1">1</xref></contrib>
<contrib id="author-3" contrib-type="author" corresp="yes">
<name name-style="western"><surname>Li</surname><given-names>Yeyang</given-names></name><xref ref-type="aff" rid="aff-2">2</xref><email>liyeyang2000@163.com</email></contrib>
<contrib id="author-4" contrib-type="author">
<name name-style="western"><surname>Xu</surname><given-names>Yusen</given-names></name><xref ref-type="aff" rid="aff-2">2</xref></contrib>
<contrib id="author-5" contrib-type="author">
<name name-style="western"><surname>Kang</surname><given-names>Yiqun</given-names></name><xref ref-type="aff" rid="aff-1">1</xref></contrib>
<contrib id="author-6" contrib-type="author">
<name name-style="western"><surname>Liu</surname><given-names>Dan</given-names></name><xref ref-type="aff" rid="aff-1">1</xref></contrib>
<contrib id="author-7" contrib-type="author">
<name name-style="western"><surname>Zhang</surname><given-names>Nianming</given-names></name><xref ref-type="aff" rid="aff-1">1</xref></contrib>
<aff id="aff-1"><label>1</label><institution>State Grid Hubei Electric Power Research Institute</institution>, <addr-line>Wuhan, 430077</addr-line>, <country>China</country></aff>
<aff id="aff-2"><label>2</label><institution>College of Energy and Electrical Engineering, Hohai University</institution>, <addr-line>Nanjing, 210098</addr-line>, <country>China</country></aff>
</contrib-group>
<author-notes>
<corresp id="cor1"><label>&#x002A;</label>Corresponding Author: Yeyang Li. Email: <email>liyeyang2000@163.com</email></corresp>
</author-notes>
<pub-date date-type="collection" publication-format="electronic">
<year>2024</year></pub-date>
<pub-date date-type="pub" publication-format="electronic">
<day>25</day>
<month>1</month>
<year>2024</year>
</pub-date>
<volume>121</volume>
<issue>2</issue>
<fpage>359</fpage>
<lpage>376</lpage>
<history>
<date date-type="received">
<day>03</day>
<month>4</month>
<year>2023</year>
</date>
<date date-type="accepted">
<day>08</day>
<month>8</month>
<year>2023</year>
</date>
</history>
<permissions>
<copyright-statement>&#x00A9; 2024 Xiao et al.</copyright-statement>
<copyright-year>2024</copyright-year>
<copyright-holder>Xiao et al.</copyright-holder>
<license xlink:href="https://creativecommons.org/licenses/by/4.0/">
<license-p>This work is licensed under a <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution 4.0 International License</ext-link>, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
</license>
</permissions>
<self-uri content-type="pdf" xlink:href="TSP_EE_40887.pdf"></self-uri>
<abstract>
<p>The fluctuation of wind power affects the operating safety and power consumption of the electric power grid and restricts the grid connection of wind power on a large scale. Therefore, wind power forecasting plays a key role in improving the safety and economic benefits of the power grid. This paper proposes a wind power predicting method based on a convolutional graph attention deep neural network with multi-wind farm data. Based on the graph attention network and attention mechanism, the method extracts spatial-temporal characteristics from the data of multiple wind farms. Then, combined with a deep neural network, a convolutional graph attention deep neural network model is constructed. Finally, the model is trained with the quantile regression loss function to achieve the wind power deterministic and probabilistic prediction based on multi-wind farm spatial-temporal data. A wind power dataset in the U.S. is taken as an example to demonstrate the efficacy of the proposed model. Compared with the selected baseline methods, the proposed model achieves the best prediction performance. The point prediction errors (i.e., root mean square error (RMSE) and normalized mean absolute percentage error (NMAPE)) are 0.304 MW and 1.177%, respectively. And the comprehensive performance of probabilistic prediction (i.e., continuously ranked probability score (CRPS)) is 0.580. Thus, the significance of multi-wind farm data and spatial-temporal feature extraction module is self-evident.</p>
</abstract>
<kwd-group kwd-group-type="author">
<kwd>Format wind power prediction</kwd>
<kwd>deep neural network</kwd>
<kwd>graph attention network</kwd>
<kwd>attention mechanism</kwd>
<kwd>quantile regression</kwd>
</kwd-group>
<funding-group>
<award-group id="awg1">
<funding-source>Science and Technology Project of State Grid Corporation of China</funding-source>
<award-id>4000-202122070A-0-0-00</award-id>
</award-group>
</funding-group>
</article-meta>
</front>
<body>
<sec id="s1">
<label>1</label>
<title>Introduction</title>
<p>Wind energy is the world&#x2019;s third-largest renewable energy source with huge development potential. China has the world&#x2019;s largest installed capacity [<xref ref-type="bibr" rid="ref-1">1</xref>]. However, wind energy is affected by atmospheric movement and its changes have strong randomness and volatility. With the development of built-in monitoring and data acquisition technology in power systems, wind farms have stored a large amount of historical wind power data and meteorological data that can be used for wind power prediction [<xref ref-type="bibr" rid="ref-2">2</xref>].</p>
<sec id="s1_1">
<label>1.1</label>
<title>Related Literature Review</title>
<p>After the research and development of wind power prediction technology over many years, a large number of prediction models have been proposed. These models can be divided into three categories according to the prediction techniques, namely the statistical model, the machine learning model, and the deep learning model [<xref ref-type="bibr" rid="ref-3">3</xref>]. A typical statistical model is the time series model, such as the autoregressive [<xref ref-type="bibr" rid="ref-4">4</xref>] and autoregression moving average [<xref ref-type="bibr" rid="ref-5">5</xref>], etc. The machine learning model includes multilayer perceptron [<xref ref-type="bibr" rid="ref-6">6</xref>], support vector machine [<xref ref-type="bibr" rid="ref-7">7</xref>], random forest [<xref ref-type="bibr" rid="ref-8">8</xref>], etc. However, it is difficult for traditional machine learning prediction models based on shallow networks to handle complex and multi-source historical data. Deep learning models have strong feature extraction ability and generalization abilities [<xref ref-type="bibr" rid="ref-9">9</xref>]. In the field of wind power prediction, the relatively mature deep learning networks are the CNN [<xref ref-type="bibr" rid="ref-10">10</xref>,<xref ref-type="bibr" rid="ref-11">11</xref>] and recurrent neural networks represented by LSTM [<xref ref-type="bibr" rid="ref-12">12</xref>,<xref ref-type="bibr" rid="ref-13">13</xref>] and GRU [<xref ref-type="bibr" rid="ref-14">14</xref>,<xref ref-type="bibr" rid="ref-15">15</xref>]. CNN is widely used to extract non-linear features in complex sequences, and LSTM and GRU are suitable for times series data modeling [<xref ref-type="bibr" rid="ref-16">16</xref>].</p>
<p>Numerical weather prediction data include the wind speed, the wind direction, the humidity, and other prediction data related to wind power [<xref ref-type="bibr" rid="ref-17">17</xref>]. Selecting appropriate NWP data as the feature input, many deep learning models can show better performance in wind power prediction. However, the complex and diverse NWP data related to wind power can diminish the prediction models&#x2019; performance, so the dynamic features of NWP data need to be extracted by efficient methods [<xref ref-type="bibr" rid="ref-18">18</xref>]. Literature [<xref ref-type="bibr" rid="ref-19">19</xref>] proposed a wind power probabilistic prediction model based on CNN and verifies its accuracy. In Literature [<xref ref-type="bibr" rid="ref-20">20</xref>], CNN and physical models are combined in prediction, which further improves the forecasting efficacy of short-term wind power. Considering the effect of meteorological elements on wind power forecasting, literature [<xref ref-type="bibr" rid="ref-21">21</xref>] screened the multivariate meteorological information data highly correlated with wind power with distance analysis and uses it as the LSTM model&#x2019;s input data. The hybrid model composed of CNN and LSTM can take advantage of each model, further improving the forecasting performance [<xref ref-type="bibr" rid="ref-22">22</xref>]. Literature [<xref ref-type="bibr" rid="ref-22">22</xref>] combined CNN and LSTM for wind power prediction and takes into consideration the meteorological elements&#x2019; influence on wind power in time and space.</p>
<p>Wind power is essentially determined by meteorological factors with spatial-temporal characteristics, including the wind speed and the wind direction. Conventional input features of wind power forecasting are all the relevant datasets of the local wind farm, which can only capture the features in time series and ignore the spatial-temporal features [<xref ref-type="bibr" rid="ref-23">23</xref>]. With the improvement of the electric system measurement system, the meteorological data of different wind farms can be managed centrally. Therefore, algorithms could be used to fuse and extract features of the data of adjacent wind farms, and to achieve more accurate wind power prediction [<xref ref-type="bibr" rid="ref-24">24</xref>]. However, deep neural networks such as CNN are mainly used to deal with well-structured data. However, the wind farm location distribution, in reality, is irregular. Therefore, it is more reasonable to use the graph structure to characterize the relationship between each wind farm. Based on the graph structure, each wind farm is independent and connected by the line relationship. For such graph structure data, graph neural networks [<xref ref-type="bibr" rid="ref-25">25</xref>] (GNN) have relatively good spatial-temporal feature extraction effects. Literature [<xref ref-type="bibr" rid="ref-26">26</xref>] proposed a graph deep learning model, which is used to study neighboring wind farms&#x2019; spatial-temporal characteristics from the wind speed and wind direction data, and to forecast the wind speed of the entire graph node according to the extracted spatial-temporal characteristics. In literature [<xref ref-type="bibr" rid="ref-27">27</xref>], the spatial-temporal correlation graph neural network was proposed to forecast the multi-node offshore wind speed, to better capture the potential spatial dependency from node relationships and historical time series, and to distinguish node contributions and generate high-dimensional spatial characteristics.</p>
<p>At present, GNN-related research is developing rapidly, among which the graph convolutional neural network [<xref ref-type="bibr" rid="ref-28">28</xref>] (GCN) has been studied and applied in many fields. GCN depends on the initial adjacency matrix and the graph attention network [<xref ref-type="bibr" rid="ref-29">29</xref>] introduces the attention mechanism based on GCN. Compared with GCN, an adaptive edge weight coefficient is added to the graph attention layer of GAT. The weight coefficient matrix of GAT does not require complex data formulas, and it can be automatically learned from GAT. In addition, the attention mechanism reduces the number of parameters to be learned. Therefore, the graph attention model has very efficient graph data processing and expression performance compared with GCN.</p>
</sec>
<sec id="s1_2">
<label>1.2</label>
<title>Contributions of This Work</title>
<p>All the above models are point prediction models, which obtain the deterministic value of the future wind power but cannot accurately describe the uncertainty of the wind power. Due to the strong volatility of wind electricity power, point forecasting may not be reliable enough to satisfy realistic scheduling needs [<xref ref-type="bibr" rid="ref-30">30</xref>]. As a result, the probability and interval forecasting of wind power has become a research hot topic. In literature [<xref ref-type="bibr" rid="ref-31">31</xref>], the authors designed a multi-source and temporal attention model to dynamically select the variables of NWP and extract temporal dependency, and construct a multi-step probabilistic prediction using a mixture density module based on a beta kernel. However, this study only considers temporal dependency. Literature [<xref ref-type="bibr" rid="ref-32">32</xref>] developed a method that improves short-term wind power probabilistic prediction by the combination of deep belief network (DBN), error scenario partitioning method that is used to mine spatial-temporal dependence of NWP data, and kernel density estimation. However, the proposed method is affected by the power characteristic curve&#x2019;s accuracy. The interval prediction can provide a high-confidence prediction interval and the probability forecasting can fit the probability density function curve of the forecasting result [<xref ref-type="bibr" rid="ref-33">33</xref>]. The interval forecasting and probability forecasting are usually implemented by a combined model of quantile regression (QR) and point prediction model [<xref ref-type="bibr" rid="ref-34">34</xref>]. Literature [<xref ref-type="bibr" rid="ref-35">35</xref>] combined QR with LSTM to propose a quantile regression long short-term memory model (QRLSTM) which obtains relatively accurate results for the wind power point prediction and the probability density prediction.</p>
<p>Based on existing research, a wind power probability forecasting model based on spatial-temporal feature extraction is proposed in the paper to realize accurate point prediction and reliable interval forecasting and probability forecasting of wind power. Firstly, CNN, GAT and attention mechanism are combined to extract the complex dynamic spatial-temporal characteristics of each adjacent wind farm. Then, LSTM is to build a QR prediction model to realize wind power forecasting. Finally, the wind power probability density function curve is obtained through KDE. The actual wind power data of a wind farm in the U.S. is tested and compared with other prediction models. As can be seen from the results, the forecasting efficacy of the proposed model has been improved. The main contributions of this work include the following threefold:</p>
<p>(1) Since the meteorological data comes from multiple wind farms, this paper combines CNN and GAT to extract characteristics of meteorological data. CNN is used to extract the complex dynamic temporal features of each adjacent wind farm, and the processed features are aggregated into the graph structure data, and then the GAT module is to learn the spatial-temporal features. Moreover, the multi-head attention mechanism is introduced in GAT to further overcome the complex data noise of meteorological data in the graph. Compared with other methods, the proposed model not only considers the meteorological data characteristics of the target wind farm, but also fuses the historical meteorological characteristics of multiple nearby wind farms for spatial-temporal feature extraction, which fully considers the coupling relationship between wind power and multi-source meteorological factors, and improve the prediction accuracy of wind power.</p>
<p>(2) We combined LSTM and QR to construct a wind power prediction model, and realized point and probabilistic wind power forecasting based on spatial-temporal data of multi-wind fields.</p>
<p>(3) The actual wind farm data is used for example test, and the comparison experiment with several models is carried out to verify the superiority of the proposed CGA-LSTM model. In addition, for the purpose of ensuring the reproduction of the proposed prediction method, we published the relevant code on GitHub<xref ref-type="fn" rid="fn1"><sup>1</sup></xref><fn id="fn1"><label>1</label><p>GitHub project website: <ext-link ext-link-type="uri" xlink:href="https://github.com/liyeyang-isfj/CGA-LSTM">https://github.com/liyeyang-isfj/CGA-LSTM</ext-link>.</p></fn>.</p>
</sec>
</sec>
<sec id="s2">
<label>2</label>
<title>The Algorithm Model Principle</title>
<sec id="s2_1">
<label>2.1</label>
<title>The Graph Modeling</title>
<p>The research subject of GAT is graph data, which is modeled from graph theory. The graph here refers to the data structure similar to the topological graph which consists of nodes and edges. The formula of the graph is given by:</p>
<p><disp-formula id="eqn-1"><label>(1)</label><mml:math id="mml-eqn-1" display="block"><mml:mtable columnalign="left" rowspacing="4pt" columnspacing="1em"><mml:mtr><mml:mtd><mml:mi>G</mml:mi><mml:mo>=</mml:mo><mml:mrow><mml:mo>(</mml:mo><mml:mi>V</mml:mi><mml:mo>,</mml:mo><mml:mi>E</mml:mi><mml:mo>,</mml:mo><mml:mi>A</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mi>V</mml:mi><mml:mo>=</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mi>v</mml:mi><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:mi>v</mml:mi><mml:mo>&#x2208;</mml:mo><mml:mtable columnalign="left left" rowspacing="4pt" columnspacing="1em"><mml:mtr><mml:mtd><mml:mi>d</mml:mi><mml:mi>a</mml:mi><mml:mi>t</mml:mi><mml:mi>a</mml:mi></mml:mtd><mml:mtd><mml:mi>o</mml:mi><mml:mi>b</mml:mi><mml:mi>j</mml:mi><mml:mi>e</mml:mi><mml:mi>c</mml:mi><mml:mi>t</mml:mi></mml:mtd></mml:mtr></mml:mtable><mml:mo>}</mml:mo></mml:mrow></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mi>E</mml:mi><mml:mo>=</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mo>&#x003C;</mml:mo><mml:mi>v</mml:mi><mml:mo>,</mml:mo><mml:mi>w</mml:mi><mml:mo>&#x003E;</mml:mo><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:mi>v</mml:mi><mml:mo>,</mml:mo><mml:mi>w</mml:mi><mml:mo>&#x2208;</mml:mo><mml:mi>V</mml:mi><mml:mo>&#x2227;</mml:mo><mml:mi>p</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mi>v</mml:mi><mml:mo>,</mml:mo><mml:mi>w</mml:mi><mml:mo>)</mml:mo></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula></p>
<p>wherein <inline-formula id="ieqn-1"><mml:math id="mml-ieqn-1"><mml:mi>V</mml:mi></mml:math></inline-formula> represents the set of vertices, <inline-formula id="ieqn-2"><mml:math id="mml-ieqn-2"><mml:mi>E</mml:mi></mml:math></inline-formula> represents the set of lines, and <inline-formula id="ieqn-3"><mml:math id="mml-ieqn-3"><mml:mi>A</mml:mi></mml:math></inline-formula> represents the adjacency matrix of graphs. The elements in the adjacency matrix only represent the connection relation between wind farms. Therefore, the adjacent matrix of GAT is constructed as follows:</p>
<p><disp-formula id="eqn-2"><label>(2)</label><mml:math id="mml-eqn-2" display="block"><mml:msub><mml:mi>A</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mtable columnalign="left left" rowspacing="4pt" columnspacing="1em"><mml:mtr><mml:mtd><mml:mn>1</mml:mn></mml:mtd><mml:mtd><mml:mi>P</mml:mi><mml:mi>C</mml:mi><mml:mi>C</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mi>v</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>v</mml:mi><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow><mml:mo>&#x2265;</mml:mo><mml:mi>&#x03B3;</mml:mi></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mn>0</mml:mn></mml:mtd><mml:mtd><mml:mi>P</mml:mi><mml:mi>C</mml:mi><mml:mi>C</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mi>v</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>v</mml:mi><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow><mml:mo>&#x003C;</mml:mo><mml:mi>&#x03B3;</mml:mi></mml:mtd></mml:mtr></mml:mtable><mml:mo fence="true" stretchy="true" symmetric="true"></mml:mo></mml:mrow></mml:math></disp-formula></p>
<p>wherein <inline-formula id="ieqn-4"><mml:math id="mml-ieqn-4"><mml:mi>P</mml:mi><mml:mi>C</mml:mi><mml:mi>C</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mi>v</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>v</mml:mi><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow></mml:math></inline-formula> indicates the Pearson correlation coefficient of the <inline-formula id="ieqn-5"><mml:math id="mml-ieqn-5"><mml:msup><mml:mi>i</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mi>h</mml:mi></mml:mrow></mml:msup></mml:math></inline-formula> and the <inline-formula id="ieqn-6"><mml:math id="mml-ieqn-6"><mml:msup><mml:mi>j</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mi>h</mml:mi></mml:mrow></mml:msup></mml:math></inline-formula> wind farm meteorological data. When <inline-formula id="ieqn-7"><mml:math id="mml-ieqn-7"><mml:mi>P</mml:mi><mml:mi>C</mml:mi><mml:mi>C</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mi>v</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>v</mml:mi><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow></mml:math></inline-formula> is greater than or equal to <inline-formula id="ieqn-8"><mml:math id="mml-ieqn-8"><mml:mi>&#x03B3;</mml:mi></mml:math></inline-formula>, <inline-formula id="ieqn-9"><mml:math id="mml-ieqn-9"><mml:msub><mml:mi>v</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> and <inline-formula id="ieqn-10"><mml:math id="mml-ieqn-10"><mml:msub><mml:mi>v</mml:mi><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> are linked to the edge <inline-formula id="ieqn-11"><mml:math id="mml-ieqn-11"><mml:msub><mml:mi>e</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2208;</mml:mo><mml:mi mathvariant="bold-italic">E</mml:mi></mml:math></inline-formula> with a weight of 1. When <inline-formula id="ieqn-12"><mml:math id="mml-ieqn-12"><mml:mi>P</mml:mi><mml:mi>C</mml:mi><mml:mi>C</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mi>v</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>v</mml:mi><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow></mml:math></inline-formula> is less than <inline-formula id="ieqn-13"><mml:math id="mml-ieqn-13"><mml:mi>&#x03B3;</mml:mi></mml:math></inline-formula>, <inline-formula id="ieqn-14"><mml:math id="mml-ieqn-14"><mml:msub><mml:mi>v</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> and <inline-formula id="ieqn-15"><mml:math id="mml-ieqn-15"><mml:msub><mml:mi>v</mml:mi><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> are not linked with edges, indicating that only the influence between connected points is considered in the attention mechanism application.</p>
</sec>
<sec id="s2_2">
<label>2.2</label>
<title>The Graph Attention Network GAT</title>
<p>The main principle of GAT is that in the model parameter training and feature extraction of graph data, the neighborhood weight of the target node and its adjacent nodes is determined by the attention mechanism. In this way, the spatial-temporal correlation between nodes can be determined by the edge weight without depending on the initial adjacent matrix.</p>
<p><xref ref-type="fig" rid="fig-1">Fig. 1</xref> shows the framework of GAT, <inline-formula id="ieqn-16"><mml:math id="mml-ieqn-16"><mml:msub><mml:mi>h</mml:mi><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub></mml:math></inline-formula> denotes the meteorological data of the target wind farm station, <inline-formula id="ieqn-17"><mml:math id="mml-ieqn-17"><mml:msub><mml:mi>h</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo>&#x223C;</mml:mo><mml:msub><mml:mi>h</mml:mi><mml:mrow><mml:mn>6</mml:mn></mml:mrow></mml:msub></mml:math></inline-formula> denote the meteorological data of the nearby station, <inline-formula id="ieqn-18"><mml:math id="mml-ieqn-18"><mml:msub><mml:mi>w</mml:mi><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub></mml:math></inline-formula> constitutes the weight matrix <inline-formula id="ieqn-19"><mml:math id="mml-ieqn-19"><mml:mi>W</mml:mi></mml:math></inline-formula>, and <inline-formula id="ieqn-20"><mml:math id="mml-ieqn-20"><mml:msubsup><mml:mi>h</mml:mi><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:msup><mml:mi></mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup></mml:mrow></mml:msubsup></mml:math></inline-formula> represents the output of the target wind farm station. The GAT input can be expressed as:</p>
<p><disp-formula id="eqn-3"><label>(3)</label><mml:math id="mml-eqn-3" display="block"><mml:mi>h</mml:mi><mml:mo>=</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:msub><mml:mover><mml:mi>h</mml:mi><mml:mo stretchy="false">&#x2192;</mml:mo></mml:mover><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mover><mml:mi>h</mml:mi><mml:mo stretchy="false">&#x2192;</mml:mo></mml:mover><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mo>&#x2026;</mml:mo><mml:mo>,</mml:mo><mml:msub><mml:mover><mml:mi>h</mml:mi><mml:mo stretchy="false">&#x2192;</mml:mo></mml:mover><mml:mrow><mml:mi>N</mml:mi></mml:mrow></mml:msub><mml:mo>}</mml:mo></mml:mrow><mml:mo>,</mml:mo><mml:msub><mml:mover><mml:mi>h</mml:mi><mml:mo stretchy="false">&#x2192;</mml:mo></mml:mover><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2208;</mml:mo><mml:msup><mml:mrow><mml:mrow><mml:mi mathvariant="double-struck">R</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mi>F</mml:mi></mml:mrow></mml:msup></mml:math></disp-formula></p>
<p>wherein <inline-formula id="ieqn-21"><mml:math id="mml-ieqn-21"><mml:mi>N</mml:mi></mml:math></inline-formula> represents the quantity of nodes and <inline-formula id="ieqn-22"><mml:math id="mml-ieqn-22"><mml:mover><mml:mi>h</mml:mi><mml:mo stretchy="false">&#x2192;</mml:mo></mml:mover></mml:math></inline-formula> denotes the input of a single node.</p>
<fig id="fig-1">
<label>Figure 1</label>
<caption>
<title>The structure of GAT</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="EE_40887-fig-1.tif"/>
</fig>
<p>Each node in GAT corresponds to a hidden state. The hidden state is jointly determined by the data input of its node and the relevant influence of the neighbor node data. This process is mainly realized through the self-attention mechanism. Its attention coefficient is calculated as follows:</p>
<p><disp-formula id="eqn-4"><label>(4)</label><mml:math id="mml-eqn-4" display="block"><mml:msub><mml:mi>e</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mi>a</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mi>W</mml:mi><mml:msub><mml:mi>h</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mi>W</mml:mi><mml:msub><mml:mi>h</mml:mi><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow></mml:math></disp-formula></p>
<p>wherein <inline-formula id="ieqn-23"><mml:math id="mml-ieqn-23"><mml:msub><mml:mi>e</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> is the attention coefficient of the adjacent node <inline-formula id="ieqn-24"><mml:math id="mml-ieqn-24"><mml:mi>j</mml:mi></mml:math></inline-formula> to the target node <inline-formula id="ieqn-25"><mml:math id="mml-ieqn-25"><mml:mi>i</mml:mi></mml:math></inline-formula>. <inline-formula id="ieqn-26"><mml:math id="mml-ieqn-26"><mml:msub><mml:mi>h</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> and <inline-formula id="ieqn-27"><mml:math id="mml-ieqn-27"><mml:msub><mml:mi>h</mml:mi><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> represent the hidden state of the adjacent nodes. <inline-formula id="ieqn-28"><mml:math id="mml-ieqn-28"><mml:mi>W</mml:mi></mml:math></inline-formula> is the weight matrix and <inline-formula id="ieqn-29"><mml:math id="mml-ieqn-29"><mml:mi>a</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mo>&#x22C5;</mml:mo><mml:mo>)</mml:mo></mml:mrow></mml:math></inline-formula> is the attention mechanism, indicating the correlation between feature vectors.</p>
<p>It can avoid the relatively large calculation amount by calculating only the attention correlation coefficient of the target node <inline-formula id="ieqn-30"><mml:math id="mml-ieqn-30"><mml:mi>i</mml:mi></mml:math></inline-formula> and its adjacent node <inline-formula id="ieqn-31"><mml:math id="mml-ieqn-31"><mml:mi>j</mml:mi></mml:math></inline-formula>. Its expression is as follows:</p>
<p><disp-formula id="eqn-5"><label>(5)</label><mml:math id="mml-eqn-5" display="block"><mml:msub><mml:mi>&#x03B1;</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mi>s</mml:mi><mml:mi>o</mml:mi><mml:mi>f</mml:mi><mml:mi>t</mml:mi><mml:msub><mml:mrow><mml:mrow><mml:mtext>max</mml:mtext></mml:mrow></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mi>e</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi>exp</mml:mi><mml:mo>&#x2061;</mml:mo><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mi>e</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:munder><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>k</mml:mi><mml:mo>&#x2208;</mml:mo><mml:mrow><mml:msub><mml:mi>N</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mrow></mml:munder><mml:mi>exp</mml:mi><mml:mo>&#x2061;</mml:mo><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mi>e</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:mfrac></mml:math></disp-formula></p>
<p>wherein <inline-formula id="ieqn-32"><mml:math id="mml-ieqn-32"><mml:mi>s</mml:mi><mml:mi>o</mml:mi><mml:mi>f</mml:mi><mml:mi>t</mml:mi><mml:munder><mml:mo movablelimits="true" form="prefix">max</mml:mo><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:munder><mml:mrow><mml:mo>(</mml:mo><mml:mo>&#x22C5;</mml:mo><mml:mo>)</mml:mo></mml:mrow></mml:math></inline-formula> is the normalization function and <inline-formula id="ieqn-33"><mml:math id="mml-ieqn-33"><mml:msub><mml:mi>N</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> indicates the quantity of adjacent nodes of node <inline-formula id="ieqn-34"><mml:math id="mml-ieqn-34"><mml:mi>i</mml:mi></mml:math></inline-formula>. And the nonlinear activation function <inline-formula id="ieqn-35"><mml:math id="mml-ieqn-35"><mml:mrow><mml:mtext>LeakyReLU</mml:mtext></mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mo>&#x22C5;</mml:mo><mml:mo>)</mml:mo></mml:mrow></mml:math></inline-formula> is used for activation with the slope of 0.2.</p>
<p><disp-formula id="eqn-6"><label>(6)</label><mml:math id="mml-eqn-6" display="block"><mml:msub><mml:mi>&#x03B1;</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi>exp</mml:mi><mml:mo>&#x2061;</mml:mo><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mtext>LeakyReLU</mml:mtext></mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mover><mml:mi>a</mml:mi><mml:mo stretchy="false">&#x2192;</mml:mo></mml:mover><mml:mrow><mml:msup><mml:mi></mml:mi><mml:mrow><mml:mi>T</mml:mi></mml:mrow></mml:msup></mml:mrow><mml:mrow><mml:mo>[</mml:mo><mml:mi>W</mml:mi><mml:msub><mml:mover><mml:mi>h</mml:mi><mml:mo stretchy="false">&#x2192;</mml:mo></mml:mover><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:mi>W</mml:mi><mml:msub><mml:mover><mml:mi>h</mml:mi><mml:mo stretchy="false">&#x2192;</mml:mo></mml:mover><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:mo>]</mml:mo></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:munder><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>k</mml:mi><mml:mo>&#x2208;</mml:mo><mml:mrow><mml:msub><mml:mi>N</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mrow></mml:munder><mml:mi>exp</mml:mi><mml:mo>&#x2061;</mml:mo><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mtext>LeakyReLU</mml:mtext></mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mover><mml:mi>a</mml:mi><mml:mo stretchy="false">&#x2192;</mml:mo></mml:mover><mml:mrow><mml:msup><mml:mi></mml:mi><mml:mrow><mml:mi>T</mml:mi></mml:mrow></mml:msup></mml:mrow><mml:mrow><mml:mo>[</mml:mo><mml:mi>W</mml:mi><mml:msub><mml:mover><mml:mi>h</mml:mi><mml:mo stretchy="false">&#x2192;</mml:mo></mml:mover><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:mi>W</mml:mi><mml:msub><mml:mover><mml:mi>h</mml:mi><mml:mo stretchy="false">&#x2192;</mml:mo></mml:mover><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:mo>]</mml:mo></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:mfrac></mml:math></disp-formula></p>
<p>wherein <inline-formula id="ieqn-36"><mml:math id="mml-ieqn-36"><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow></mml:math></inline-formula> indicates the feature connection and <inline-formula id="ieqn-37"><mml:math id="mml-ieqn-37"><mml:mover><mml:mi>a</mml:mi><mml:mo stretchy="false">&#x2192;</mml:mo></mml:mover><mml:mrow><mml:msup><mml:mi></mml:mi><mml:mrow><mml:mi>T</mml:mi></mml:mrow></mml:msup></mml:mrow><mml:mrow><mml:mo>[</mml:mo><mml:mi>W</mml:mi><mml:msub><mml:mover><mml:mi>h</mml:mi><mml:mo stretchy="false">&#x2192;</mml:mo></mml:mover><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:mi>W</mml:mi><mml:msub><mml:mover><mml:mi>h</mml:mi><mml:mo stretchy="false">&#x2192;</mml:mo></mml:mover><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:math></inline-formula> shows that nodes <inline-formula id="ieqn-38"><mml:math id="mml-ieqn-38"><mml:mi>i</mml:mi></mml:math></inline-formula> and <inline-formula id="ieqn-39"><mml:math id="mml-ieqn-39"><mml:mi>j</mml:mi></mml:math></inline-formula> are feature-extracted separately and stitched.</p>
<p>Through the above computation, the output of each node is obtained as follows:</p>
<p><disp-formula id="eqn-7"><label>(7)</label><mml:math id="mml-eqn-7" display="block"><mml:mover><mml:mi>h</mml:mi><mml:mo stretchy="false">&#x2192;</mml:mo></mml:mover><mml:mrow><mml:msubsup><mml:mi></mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:msup><mml:mi></mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup></mml:mrow></mml:msubsup></mml:mrow><mml:mo>=</mml:mo><mml:mi>&#x03C3;</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:munder><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>j</mml:mi><mml:mo>&#x2208;</mml:mo><mml:mrow><mml:msub><mml:mi>N</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mrow></mml:munder><mml:msub><mml:mi>&#x03B1;</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mi>W</mml:mi><mml:msub><mml:mover><mml:mi>h</mml:mi><mml:mo stretchy="false">&#x2192;</mml:mo></mml:mover><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow></mml:math></disp-formula></p>
<p>wherein <inline-formula id="ieqn-40"><mml:math id="mml-ieqn-40"><mml:mi>&#x03C3;</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mo>&#x22C5;</mml:mo><mml:mo>)</mml:mo></mml:mrow></mml:math></inline-formula> represents the activation function of sigmoid and <inline-formula id="ieqn-41"><mml:math id="mml-ieqn-41"><mml:mover><mml:mi>h</mml:mi><mml:mo stretchy="false">&#x2192;</mml:mo></mml:mover><mml:msup><mml:mrow><mml:msub><mml:mi></mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mrow><mml:mrow><mml:msup><mml:mi></mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup></mml:mrow></mml:mrow></mml:msup></mml:math></inline-formula> denotes the output of node <inline-formula id="ieqn-42"><mml:math id="mml-ieqn-42"><mml:mi>i</mml:mi></mml:math></inline-formula>.</p>
<p>Through GAT, the target node highly aggregates the characteristics information of each adjacent node according to the weight information with each adjacent node and adaptively extracts the highly correlated node features of adjacent nodes. Therefore, GAT has efficient spatial-temporal feature extraction ability and GAT is flexible in modeling without relying on the graph structure and node order, which can enhance the model&#x2019;s prediction ability.</p>
</sec>
<sec id="s2_3">
<label>2.3</label>
<title>The Head Attention Mechanism</title>
<p>To raise the reliability and stability of GAT spatial-temporal feature extraction, we bring the multi-head attention mechanism to GAT. The multi-head attention uses K independent attention mechanisms to improve <xref ref-type="disp-formula" rid="eqn-7">formula (7)</xref>, that is, the K-order parallel independent operation of GAT is conducted. Then, the results of each conversion are combined to obtain the final feature output result as follows:</p>
<p><disp-formula id="eqn-8"><label>(8)</label><mml:math id="mml-eqn-8" display="block"><mml:mover><mml:mi>h</mml:mi><mml:mo stretchy="false">&#x2192;</mml:mo></mml:mover><mml:mrow><mml:msubsup><mml:mi></mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:msup><mml:mi></mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup></mml:mrow></mml:msubsup></mml:mrow><mml:mo>=</mml:mo><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:msubsup><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:mrow><mml:mi>k</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>K</mml:mi></mml:mrow></mml:msubsup><mml:mi>&#x03C3;</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:munder><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>j</mml:mi><mml:mo>&#x2208;</mml:mo><mml:mrow><mml:msub><mml:mi>N</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mrow></mml:munder><mml:msubsup><mml:mi>&#x03B1;</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msubsup><mml:msup><mml:mi>W</mml:mi><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msup><mml:msub><mml:mover><mml:mi>h</mml:mi><mml:mo stretchy="false">&#x2192;</mml:mo></mml:mover><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow></mml:math></disp-formula></p>
<p>wherein <inline-formula id="ieqn-43"><mml:math id="mml-ieqn-43"><mml:msubsup><mml:mi>&#x03B1;</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msubsup></mml:math></inline-formula> represents the normalized result of the attention coefficient <inline-formula id="ieqn-44"><mml:math id="mml-ieqn-44"><mml:msup><mml:mi>&#x03B1;</mml:mi><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msup></mml:math></inline-formula> of the <inline-formula id="ieqn-45"><mml:math id="mml-ieqn-45"><mml:msup><mml:mi>k</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mi>h</mml:mi></mml:mrow></mml:msup></mml:math></inline-formula> order attention mechanism. The features of each head are connected or averaged to get the final feature output <inline-formula id="ieqn-46"><mml:math id="mml-ieqn-46"><mml:mover><mml:msubsup><mml:mrow><mml:mi>h</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mrow><mml:msup><mml:mi></mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup></mml:mrow></mml:mrow></mml:msubsup><mml:mo stretchy="false">&#x2192;</mml:mo></mml:mover></mml:math></inline-formula>.</p>
<p>The graph data, especially when there is complex data noise in the meteorological data, will greatly impact the performance of GAT. However, the multi-head attention mechanism can make GAT mode&#x2019;s attention learning more reliable and stable, which can help notice the most important node in the graph and highlight the most important feature information.</p>
</sec>
<sec id="s2_4">
<label>2.4</label>
<title>The Convolutional Neural Network CNN</title>
<p>CNN is a deep neural network on the basis of convolution operation with pooling, local connection, and weight sharing. It is widely used to automatically learn labeled data and extract complex features in data [<xref ref-type="bibr" rid="ref-11">11</xref>]. The structure of the one-dimensional CNN is displayed in <xref ref-type="fig" rid="fig-2">Fig. 2</xref>, which is mainly composed of two convolution layers, two pooling layers, and one fully connected layer. Features of the input data are extracted by the convolutional layer through scanning the convolution core. The pooling layer is utilized to sample the features that are extracted by the convolution layer, and to reduce network complexity while retaining the feature vector&#x2019;s main information; the fully-connected layer is to select the appropriate activation function for full connection, and the output activation value is the feature extracted by CNN.</p>
<fig id="fig-2">
<label>Figure 2</label>
<caption>
<title>The structure of one-dimensional CNN</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="EE_40887-fig-2.tif"/>
</fig>
</sec>
<sec id="s2_5">
<label>2.5</label>
<title>The Long Short-Term Memory LSTM</title>
<p>Based on recurrent neural network (RNN), the LSTM model has been improved and solved RNN&#x2019;s problem of being unable to effectively process long-distance information and being prone to gradient disappearance and explosion. Therefore, it is widely used in the analysis and processing of time series data. As shown in <xref ref-type="fig" rid="fig-3">Fig. 3</xref>, the unit structure of LSTM mainly contains the &#x201C;forget gate&#x201D;, the &#x201C;input gate&#x201D;, and the &#x201C;output gate&#x201D;, whose outputs are <inline-formula id="ieqn-47"><mml:math id="mml-ieqn-47"><mml:msub><mml:mi>f</mml:mi><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>, <inline-formula id="ieqn-48"><mml:math id="mml-ieqn-48"><mml:msub><mml:mi>i</mml:mi><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> and <inline-formula id="ieqn-49"><mml:math id="mml-ieqn-49"><mml:msub><mml:mi>o</mml:mi><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>, and can selectively retain or forget information, thus overcoming the adverse effects of short-term memory. The forget gate is to decide what information should be discarded or retained. The input gate is to update the cell state and selectively transmits the formerly hidden status information and new input information to the new LSTM cell, and the output gate is used to regulate the filtering of the unit cell status and determine information of the next hidden state. In addition, <inline-formula id="ieqn-50"><mml:math id="mml-ieqn-50"><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> denotes the input of the LSTM unit, <inline-formula id="ieqn-51"><mml:math id="mml-ieqn-51"><mml:msub><mml:mi>c</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub></mml:math></inline-formula> and <inline-formula id="ieqn-52"><mml:math id="mml-ieqn-52"><mml:msub><mml:mi>c</mml:mi><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> denote the cell unit states of the previous moment and the current moment, respectively, and <inline-formula id="ieqn-53"><mml:math id="mml-ieqn-53"><mml:msub><mml:mi>h</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub></mml:math></inline-formula> and <inline-formula id="ieqn-54"><mml:math id="mml-ieqn-54"><mml:msub><mml:mi>h</mml:mi><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> denote the corresponding hidden layer states.</p>
<fig id="fig-3">
<label>Figure 3</label>
<caption>
<title>The unit structure of LSTM</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="EE_40887-fig-3.tif"/>
</fig>
</sec>
</sec>
<sec id="s3">
<label>3</label>
<title>The Wind Power Prediction Model Based on CGA-LSTM</title>
<sec id="s3_1">
<label>3.1</label>
<title>CGA-LSTM Model</title>
<p>As shown in <xref ref-type="fig" rid="fig-4">Fig. 4</xref>, the multi-wind farm wind power spatial-temporal prediction combined model CGA-LSTM consists of the input module, the spatial-temporal feature extraction module, the deep learning prediction module, and the output module. The input module includes the meteorological data of multiple wind fields <inline-formula id="ieqn-55"><mml:math id="mml-ieqn-55"><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mo>&#x22EF;</mml:mo><mml:mo>,</mml:mo><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> whose temporal characteristics are extracted by CNN, and the wind power historical data <inline-formula id="ieqn-56"><mml:math id="mml-ieqn-56"><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>P</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>. The feature extraction and deep learning module are the core of the model, which jointly achieved accurate and reliable prediction of wind power. Through introducing the multi-head attention mechanism into GAT layers, the spatial-temporal feature extraction module mines the spatial-temporal characteristics of meteorological data from different wind farms to prepare for further wind power forecasting. The deep learning prediction module is composed of LSTM, which further achieves the effective correlation between the spatial-temporal characteristics of meteorological data and wind power data. The forecasting model based on CGA-LSTM realizes wind power characteristics mining through the above two modules, and obtains point and probability forecasting results through the output module. Next, the specific functions of each module are introduced, respectively.</p>
<fig id="fig-4">
<label>Figure 4</label>
<caption>
<title>The structure of CGA-LSTM</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="EE_40887-fig-4.tif"/>
</fig>
<sec id="s3_1_1">
<label>3.1.1</label>
<title>Input Module</title>
<p>The model has two input modules, the meteorological data of n adjacent wind farms <inline-formula id="ieqn-57"><mml:math id="mml-ieqn-57"><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>n</mml:mi><mml:mi>w</mml:mi><mml:mi>p</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mrow><mml:mo>[</mml:mo><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mo>&#x2026;</mml:mo><mml:mo>,</mml:mo><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:msub><mml:mo>]</mml:mo></mml:mrow></mml:math></inline-formula> and the wind power historical data input <inline-formula id="ieqn-58"><mml:math id="mml-ieqn-58"><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>P</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> stitched together with the spatial-temporal features extracted from GAT.</p>
<p>The meteorological data mainly include the wind speed, the wind direction, the atmospheric density, the humidity, and the temperature. The correlation and dimension between the meteorological data are different, and the time series features of wind power are not obvious. Therefore, to guarantee the more efficient spatial-temporal feature extraction of the subsequent GAT layer, an independent convolution-based feature extraction module is used for the meteorological data of each node to initially extract the high-dimensional dynamic time series feature of each node. The meteorological data of each node <inline-formula id="ieqn-59"><mml:math id="mml-ieqn-59"><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>f</mml:mi><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> is transformed into a one-dimensional time series feature <inline-formula id="ieqn-60"><mml:math id="mml-ieqn-60"><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>f</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mrow><mml:mo>[</mml:mo><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>f</mml:mi><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>f</mml:mi><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mo>&#x2026;</mml:mo><mml:mo>,</mml:mo><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>f</mml:mi><mml:mi>n</mml:mi></mml:mrow></mml:msub><mml:mo>]</mml:mo></mml:mrow></mml:math></inline-formula> through convolution, pooling, flattening, and other operations. The node data of the graph data is composed of n time series features. As the input of each graph network node, the time series feature <inline-formula id="ieqn-61"><mml:math id="mml-ieqn-61"><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>f</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> of each node obtained by convolution constitutes an undirected fully connected graph, which is the input of the GAT network.</p>
</sec>
<sec id="s3_1_2">
<label>3.1.2</label>
<title>The Spatial-Temporal Feature Extraction Module</title>
<p>The spatial-temporal feature extraction module has three parallel independent GAT modules, and each GAT module consists of two GAT layers, which is to extract the spatial-temporal characteristics of the target node. The multi-head attention mechanism is utilized to make the prediction effect more stable and reliable. The fusion feature is obtained by adjusting the dimension of <inline-formula id="ieqn-62"><mml:math id="mml-ieqn-62"><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>G</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> and <inline-formula id="ieqn-63"><mml:math id="mml-ieqn-63"><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>P</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>.</p>
</sec>
<sec id="s3_1_3">
<label>3.1.3</label>
<title>Deep Learning Prediction Module</title>
<p>In the probability prediction module, LSTM is used to extract the fusion feature <inline-formula id="ieqn-64"><mml:math id="mml-ieqn-64"><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>F</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>. The results are processed by the multi-fully connected layer. The quantile loss function optimization training is received and the wind power&#x2019;s predicted value under diverse quantiles is finally output. The quantile loss function is:</p>
<p><disp-formula id="eqn-9"><label>(9)</label><mml:math id="mml-eqn-9" display="block"><mml:mo movablelimits="true" form="prefix">min</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mrow><mml:mi>m</mml:mi><mml:mi>q</mml:mi></mml:mrow></mml:mfrac><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>j</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>q</mml:mi></mml:mrow></mml:munderover><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>m</mml:mi></mml:mrow></mml:munderover><mml:msub><mml:mi>&#x03C1;</mml:mi><mml:mrow><mml:mrow><mml:msub><mml:mi>&#x03C4;</mml:mi><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mrow></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:mi>f</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mi>W</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mi>&#x03C4;</mml:mi><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow><mml:mo>,</mml:mo><mml:mi>b</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mi>&#x03C4;</mml:mi><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow><mml:mo>,</mml:mo><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:math></disp-formula></p>
<p>wherein <inline-formula id="ieqn-65"><mml:math id="mml-ieqn-65"><mml:mi>m</mml:mi></mml:math></inline-formula> denotes the number of samples for each batch training set. <inline-formula id="ieqn-66"><mml:math id="mml-ieqn-66"><mml:mi>q</mml:mi></mml:math></inline-formula> is the number of quantiles. <inline-formula id="ieqn-67"><mml:math id="mml-ieqn-67"><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> represents the real value of the output. <inline-formula id="ieqn-68"><mml:math id="mml-ieqn-68"><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> represents the input of the sample. <inline-formula id="ieqn-69"><mml:math id="mml-ieqn-69"><mml:mi>f</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mi>W</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mi>&#x03C4;</mml:mi><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow><mml:mo>,</mml:mo><mml:mi>b</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mi>&#x03C4;</mml:mi><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow><mml:mo>,</mml:mo><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow></mml:math></inline-formula> is the predicted value output by the <inline-formula id="ieqn-70"><mml:math id="mml-ieqn-70"><mml:msup><mml:mi>i</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mi>h</mml:mi></mml:mrow></mml:msup></mml:math></inline-formula> sample of under the quantile condition <inline-formula id="ieqn-71"><mml:math id="mml-ieqn-71"><mml:msub><mml:mi>&#x03C4;</mml:mi><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>. <inline-formula id="ieqn-72"><mml:math id="mml-ieqn-72"><mml:mi>W</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mi>&#x03C4;</mml:mi><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow></mml:math></inline-formula> and <inline-formula id="ieqn-73"><mml:math id="mml-ieqn-73"><mml:mi>b</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mi>&#x03C4;</mml:mi><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow></mml:math></inline-formula> denote the model parameters related to the quantile <inline-formula id="ieqn-74"><mml:math id="mml-ieqn-74"><mml:msub><mml:mi>&#x03C4;</mml:mi><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>. <inline-formula id="ieqn-75"><mml:math id="mml-ieqn-75"><mml:mi>f</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mo>&#x22C5;</mml:mo><mml:mo>)</mml:mo></mml:mrow></mml:math></inline-formula> is the nonlinear function that reflects the connection between <inline-formula id="ieqn-76"><mml:math id="mml-ieqn-76"><mml:mi>X</mml:mi></mml:math></inline-formula> and <inline-formula id="ieqn-77"><mml:math id="mml-ieqn-77"><mml:mi>y</mml:mi></mml:math></inline-formula>. <inline-formula id="ieqn-78"><mml:math id="mml-ieqn-78"><mml:mi>&#x03C1;</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mo>&#x22C5;</mml:mo><mml:mo>)</mml:mo></mml:mrow></mml:math></inline-formula> is the check function and its formula is as follows:</p>
<p><disp-formula id="eqn-10"><label>(10)</label><mml:math id="mml-eqn-10" display="block"><mml:msub><mml:mi>&#x03C1;</mml:mi><mml:mrow><mml:mi>&#x03C4;</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:mi>&#x03BC;</mml:mi><mml:mo>)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mi>&#x03BC;</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>&#x03C4;</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mi>I</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>&#x03BC;</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo stretchy="false">)</mml:mo><mml:mo>,</mml:mo><mml:mspace width="1em" /><mml:mi>I</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mi>&#x03BC;</mml:mi><mml:mo>)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mtable columnalign="left left" rowspacing="4pt" columnspacing="1em"><mml:mtr><mml:mtd><mml:mn>1</mml:mn><mml:mo>,</mml:mo></mml:mtd><mml:mtd><mml:mi>&#x03BC;</mml:mi><mml:mo>&#x003C;</mml:mo><mml:mn>0</mml:mn></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mn>0</mml:mn><mml:mo>,</mml:mo></mml:mtd><mml:mtd><mml:mi>&#x03BC;</mml:mi><mml:mo>&#x2265;</mml:mo><mml:mn>0</mml:mn></mml:mtd></mml:mtr></mml:mtable><mml:mo fence="true" stretchy="true" symmetric="true"></mml:mo></mml:mrow></mml:math></disp-formula></p>
<p>wherein is the indicator function and <inline-formula id="ieqn-79"><mml:math id="mml-ieqn-79"><mml:mi>&#x03BC;</mml:mi></mml:math></inline-formula> is the input of the function.</p>
<p>The process of the probability prediction module can be calculated as:</p>
<p><disp-formula id="eqn-11"><label>(11)</label><mml:math id="mml-eqn-11" display="block"><mml:mrow><mml:mover><mml:mi>y</mml:mi><mml:mo>&#x007E;</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>&#x03C4;</mml:mi><mml:mo>)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mi>f</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mover><mml:mi>W</mml:mi><mml:mo>&#x007E;</mml:mo></mml:mover></mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>&#x03C4;</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>,</mml:mo><mml:mrow><mml:mover><mml:mi>b</mml:mi><mml:mo>&#x007E;</mml:mo></mml:mover></mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>&#x03C4;</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>,</mml:mo><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>F</mml:mi></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow></mml:math></disp-formula></p>
<p>wherein <inline-formula id="ieqn-80"><mml:math id="mml-ieqn-80"><mml:mrow><mml:mover><mml:mi>y</mml:mi><mml:mo>&#x007E;</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>&#x03C4;</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:math></inline-formula> is the wind power predicted value at the quantile <inline-formula id="ieqn-81"><mml:math id="mml-ieqn-81"><mml:mi>&#x03C4;</mml:mi></mml:math></inline-formula>. <inline-formula id="ieqn-82"><mml:math id="mml-ieqn-82"><mml:mrow><mml:mover><mml:mi>W</mml:mi><mml:mo>&#x007E;</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>&#x03C4;</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:math></inline-formula> and <inline-formula id="ieqn-83"><mml:math id="mml-ieqn-83"><mml:mi>b</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mi>&#x03C4;</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:math></inline-formula> are the network weight and deviation, respectively. <inline-formula id="ieqn-84"><mml:math id="mml-ieqn-84"><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>F</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> denotes the fusion feature. And <inline-formula id="ieqn-85"><mml:math id="mml-ieqn-85"><mml:mrow><mml:mover><mml:mi>y</mml:mi><mml:mo>&#x007E;</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>&#x03C4;</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:math></inline-formula> denotes the predicted output of wind power.</p>
</sec>
<sec id="s3_1_4">
<label>3.1.4</label>
<title>The Output Module</title>
<p>Based on the predicted values of different quantile conditions, the point and interval forecasting results can be obtained. The point prediction result is the predicted value <inline-formula id="ieqn-86"><mml:math id="mml-ieqn-86"><mml:mrow><mml:mover><mml:mi>y</mml:mi><mml:mo>&#x007E;</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mn>0.5</mml:mn><mml:mo>)</mml:mo></mml:mrow></mml:math></inline-formula> at the of 0.5. The interval prediction is <inline-formula id="ieqn-87"><mml:math id="mml-ieqn-87"><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mover><mml:mi>y</mml:mi><mml:mo>&#x007E;</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mi>&#x03C4;</mml:mi><mml:mrow><mml:mi>d</mml:mi><mml:mi>o</mml:mi><mml:mi>w</mml:mi><mml:mi>n</mml:mi></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow><mml:mo>,</mml:mo><mml:mrow><mml:mover><mml:mi>y</mml:mi><mml:mo>&#x007E;</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mi>&#x03C4;</mml:mi><mml:mrow><mml:mi>u</mml:mi><mml:mi>p</mml:mi></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:math></inline-formula>, wherein <inline-formula id="ieqn-88"><mml:math id="mml-ieqn-88"><mml:msub><mml:mi>&#x03C4;</mml:mi><mml:mrow><mml:mi>d</mml:mi><mml:mi>o</mml:mi><mml:mi>w</mml:mi><mml:mi>n</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> and <inline-formula id="ieqn-89"><mml:math id="mml-ieqn-89"><mml:msub><mml:mi>&#x03C4;</mml:mi><mml:mrow><mml:mi>u</mml:mi><mml:mi>p</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> represent the confidence interval lower and upper limits at the given confidence degree. The prediction results of different quantile conditions at a certain time can be seen as a random sample following a certain distribution. Therefore, KDE can fit all the quantile predicted values at that time to get the PDF of the wind power predicted value at that time.</p>
<p>The KDE expression is as follows:</p>
<p><disp-formula id="eqn-12"><label>(12)</label><mml:math id="mml-eqn-12" display="block"><mml:msub><mml:mi>g</mml:mi><mml:mrow><mml:mi>h</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:mi>z</mml:mi><mml:mo>)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mrow><mml:mi>n</mml:mi><mml:mi>h</mml:mi></mml:mrow></mml:mfrac><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:munderover><mml:mi>K</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mfrac><mml:mrow><mml:mi>z</mml:mi><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>z</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mi>h</mml:mi></mml:mfrac><mml:mo>)</mml:mo></mml:mrow></mml:math></disp-formula></p>
<p>wherein <inline-formula id="ieqn-90"><mml:math id="mml-ieqn-90"><mml:msub><mml:mi>g</mml:mi><mml:mrow><mml:mi>h</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:mi>z</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:math></inline-formula> represents the estimated PDF. <inline-formula id="ieqn-91"><mml:math id="mml-ieqn-91"><mml:mi>n</mml:mi></mml:math></inline-formula> denotes the number of samples. <inline-formula id="ieqn-92"><mml:math id="mml-ieqn-92"><mml:mi>K</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mo>&#x22C5;</mml:mo><mml:mo>)</mml:mo></mml:mrow></mml:math></inline-formula> represents the kernel function. <inline-formula id="ieqn-93"><mml:math id="mml-ieqn-93"><mml:mi>h</mml:mi></mml:math></inline-formula> is the kernel function bandwidth. Epanechnikov is selected as the kernel function. In this paper, we select the grid search with cross-validation to obtain the suitable bandwidth.</p>
</sec>
</sec>
<sec id="s3_2">
<label>3.2</label>
<title>Evaluation Indicators to Predict Model Performance</title>
<sec id="s3_2_1">
<label>3.2.1</label>
<title>Point Prediction Evaluation Indicators</title>
<p>To assess and compare the point prediction capability of the forecasting model, the root mean square error (RMSE) and normalized mean absolute percentage error (NMAPE) are adopted as the evaluation metrics with the formulas as:</p>
<p><disp-formula id="eqn-13"><label>(13)</label><mml:math id="mml-eqn-13" display="block"><mml:msub><mml:mi>E</mml:mi><mml:mrow><mml:mrow><mml:mtext>RMSE</mml:mtext></mml:mrow></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msqrt><mml:mfrac><mml:mn>1</mml:mn><mml:mi>n</mml:mi></mml:mfrac><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:munderover><mml:msup><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mrow><mml:mover><mml:mi>y</mml:mi><mml:mo stretchy="false">&#x005E;</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:msqrt></mml:math></disp-formula></p>
<p><disp-formula id="eqn-14"><label>(14)</label><mml:math id="mml-eqn-14" display="block"><mml:msub><mml:mi>E</mml:mi><mml:mrow><mml:mrow><mml:mtext>NMAPE</mml:mtext></mml:mrow></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mi>n</mml:mi></mml:mfrac><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:munderover><mml:mfrac><mml:mrow><mml:mo>|</mml:mo><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:mrow><mml:mo fence="true" stretchy="true" symmetric="true"></mml:mo><mml:msub><mml:mrow><mml:mover><mml:mi>y</mml:mi><mml:mo stretchy="false">&#x005E;</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>|</mml:mo></mml:mrow><mml:mo fence="true" stretchy="true" symmetric="true"></mml:mo></mml:mrow><mml:mrow><mml:munderover><mml:mo movablelimits="true" form="prefix">max</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:munderover><mml:msub><mml:mrow><mml:mi mathvariant="bold">y</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac><mml:mo>&#x00D7;</mml:mo><mml:mn>100</mml:mn><mml:mi mathvariant="normal">&#x0025;</mml:mi></mml:math></disp-formula></p>
<p>wherein <inline-formula id="ieqn-94"><mml:math id="mml-ieqn-94"><mml:mi>n</mml:mi></mml:math></inline-formula> denotes the quantity of test samples. <inline-formula id="ieqn-95"><mml:math id="mml-ieqn-95"><mml:mi>i</mml:mi></mml:math></inline-formula> denotes the sample serial number. <inline-formula id="ieqn-96"><mml:math id="mml-ieqn-96"><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> and <inline-formula id="ieqn-97"><mml:math id="mml-ieqn-97"><mml:msub><mml:mrow><mml:mover><mml:mi>y</mml:mi><mml:mo stretchy="false">&#x005E;</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> represent the real value and predicted value of the <inline-formula id="ieqn-98"><mml:math id="mml-ieqn-98"><mml:msup><mml:mi>i</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mi>h</mml:mi></mml:mrow></mml:msup></mml:math></inline-formula> sample output, respectively.</p>
<p>The model with the smaller RMSE and NMAPE values has higher point forecasting accuracy.</p>
</sec>
<sec id="s3_2_2">
<label>3.2.2</label>
<title>Probabilistic Prediction Evaluation Indicators</title>
<p>To assess the effect of interval prediction models, we selected the average coverage error (ACE), the prediction interval normalized average width (PINAW), and the interval sharpness (IS) metrics for comparative analysis. The formula is:</p>
<p><disp-formula id="eqn-15"><label>(15)</label><mml:math id="mml-eqn-15" display="block"><mml:msub><mml:mi>E</mml:mi><mml:mrow><mml:mrow><mml:mtext>ACE</mml:mtext></mml:mrow></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mrow><mml:mo>(</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mi>n</mml:mi></mml:mfrac><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:munderover><mml:mrow><mml:mo>{</mml:mo><mml:mtable columnalign="left" rowspacing="4pt" columnspacing="1em"><mml:mtr><mml:mtd><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2208;</mml:mo><mml:mrow><mml:mo>[</mml:mo><mml:msubsup><mml:mi>l</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x03B1;</mml:mi></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mi>u</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x03B1;</mml:mi></mml:mrow></mml:msubsup><mml:mo>]</mml:mo></mml:mrow></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2209;</mml:mo><mml:mrow><mml:mo>[</mml:mo><mml:msubsup><mml:mi>l</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x03B1;</mml:mi></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mi>u</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x03B1;</mml:mi></mml:mrow></mml:msubsup><mml:mo>]</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable><mml:mo fence="true" stretchy="true" symmetric="true"></mml:mo></mml:mrow><mml:mo>&#x2212;</mml:mo><mml:mrow><mml:mo>(</mml:mo><mml:mn>1</mml:mn><mml:mo>&#x2212;</mml:mo><mml:mi>&#x03B1;</mml:mi><mml:mo>)</mml:mo></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>&#x00D7;</mml:mo><mml:mn>100</mml:mn><mml:mi mathvariant="normal">&#x0025;</mml:mi></mml:math></disp-formula></p>
<p><disp-formula id="eqn-16"><label>(16)</label><mml:math id="mml-eqn-16" display="block"><mml:msub><mml:mi>E</mml:mi><mml:mrow><mml:mrow><mml:mtext>PINAW</mml:mtext></mml:mrow></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mrow><mml:mi>n</mml:mi><mml:mi>s</mml:mi></mml:mrow></mml:mfrac><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:munderover><mml:mrow><mml:mo>(</mml:mo><mml:msubsup><mml:mi>u</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x03B1;</mml:mi></mml:mrow></mml:msubsup><mml:mo>&#x2212;</mml:mo><mml:msubsup><mml:mi>l</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x03B1;</mml:mi></mml:mrow></mml:msubsup><mml:mo>)</mml:mo></mml:mrow></mml:math></disp-formula></p>
<p><disp-formula id="eqn-17"><label>(17)</label><mml:math id="mml-eqn-17" display="block"><mml:msub><mml:mi>E</mml:mi><mml:mrow><mml:mrow><mml:mtext>IS</mml:mtext></mml:mrow></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mi>n</mml:mi></mml:mfrac><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:munderover><mml:mrow><mml:mo>{</mml:mo><mml:mtable columnalign="left" rowspacing="4pt" columnspacing="1em"><mml:mtr><mml:mtd><mml:mo>&#x2212;</mml:mo><mml:mn>2</mml:mn><mml:mi>&#x03B1;</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:msubsup><mml:mi>u</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x03B1;</mml:mi></mml:mrow></mml:msubsup><mml:mo>&#x2212;</mml:mo><mml:msubsup><mml:mi>l</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x03B1;</mml:mi></mml:mrow></mml:msubsup><mml:mo>)</mml:mo></mml:mrow><mml:mo>,</mml:mo><mml:mspace width="thinmathspace" /><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2208;</mml:mo><mml:mrow><mml:mo>[</mml:mo><mml:msubsup><mml:mi>l</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x03B1;</mml:mi></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mi>u</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x03B1;</mml:mi></mml:mrow></mml:msubsup><mml:mo>]</mml:mo></mml:mrow></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mo>&#x2212;</mml:mo><mml:mn>2</mml:mn><mml:mi>&#x03B1;</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:msubsup><mml:mi>u</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x03B1;</mml:mi></mml:mrow></mml:msubsup><mml:mo>&#x2212;</mml:mo><mml:msubsup><mml:mi>l</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x03B1;</mml:mi></mml:mrow></mml:msubsup><mml:mo>)</mml:mo></mml:mrow><mml:mo>&#x2212;</mml:mo><mml:mn>4</mml:mn><mml:mrow><mml:mo>(</mml:mo><mml:msubsup><mml:mi>l</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x03B1;</mml:mi></mml:mrow></mml:msubsup><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow><mml:mo>,</mml:mo><mml:mspace width="thinmathspace" /><mml:mspace width="thinmathspace" /><mml:mspace width="thinmathspace" /><mml:mspace width="thinmathspace" /><mml:mspace width="thinmathspace" /><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x003C;</mml:mo><mml:msubsup><mml:mi>l</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x03B1;</mml:mi></mml:mrow></mml:msubsup></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mo>&#x2212;</mml:mo><mml:mn>2</mml:mn><mml:mi>&#x03B1;</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:msubsup><mml:mi>u</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x03B1;</mml:mi></mml:mrow></mml:msubsup><mml:mo>&#x2212;</mml:mo><mml:msubsup><mml:mi>l</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x03B1;</mml:mi></mml:mrow></mml:msubsup><mml:mo>)</mml:mo></mml:mrow><mml:mo>&#x2212;</mml:mo><mml:mn>4</mml:mn><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msubsup><mml:mi>u</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x03B1;</mml:mi></mml:mrow></mml:msubsup><mml:mo>)</mml:mo></mml:mrow><mml:mo>,</mml:mo><mml:mspace width="thinmathspace" /><mml:mspace width="thinmathspace" /><mml:mspace width="thinmathspace" /><mml:mspace width="thinmathspace" /><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x003E;</mml:mo><mml:msubsup><mml:mi>u</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x03B1;</mml:mi></mml:mrow></mml:msubsup></mml:mtd></mml:mtr></mml:mtable><mml:mo fence="true" stretchy="true" symmetric="true"></mml:mo></mml:mrow></mml:math></disp-formula></p>
<p>wherein <inline-formula id="ieqn-99"><mml:math id="mml-ieqn-99"><mml:mi>&#x03B1;</mml:mi></mml:math></inline-formula> represents the significance level. <inline-formula id="ieqn-100"><mml:math id="mml-ieqn-100"><mml:msubsup><mml:mi>u</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x03B1;</mml:mi></mml:mrow></mml:msubsup></mml:math></inline-formula> and <inline-formula id="ieqn-101"><mml:math id="mml-ieqn-101"><mml:msubsup><mml:mi>l</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x03B1;</mml:mi></mml:mrow></mml:msubsup></mml:math></inline-formula> represent the lower and upper limits of the <inline-formula id="ieqn-102"><mml:math id="mml-ieqn-102"><mml:msup><mml:mi>i</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mi>h</mml:mi></mml:mrow></mml:msup></mml:math></inline-formula> sample prediction under confidence degree <inline-formula id="ieqn-103"><mml:math id="mml-ieqn-103"><mml:mrow><mml:mo>(</mml:mo><mml:mn>1</mml:mn><mml:mo>&#x2212;</mml:mo><mml:mi>&#x03B1;</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:math></inline-formula>. <inline-formula id="ieqn-104"><mml:math id="mml-ieqn-104"><mml:mi>s</mml:mi></mml:math></inline-formula> is the difference value of the actual maximum and minimum value.</p>
<p>A larger ACE indicates a larger coverage of the prediction interval at a certain significance level and a higher interval prediction reliability. Smaller PINAW indicates the narrower average width of the prediction interval obtained by the model. And higher IS indicates the better interval prediction comprehensive capability of the model.</p>
<p>In this paper, the continuously ranked probability score (CRPS) is employed to reflect the probability forecasting efficacy. And the formula is as follows:</p>
<p><disp-formula id="eqn-18"><label>(18)</label><mml:math id="mml-eqn-18" display="block"><mml:msub><mml:mi>E</mml:mi><mml:mrow><mml:mrow><mml:mtext>CRPS</mml:mtext></mml:mrow></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mi>n</mml:mi></mml:mfrac><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:munderover><mml:msubsup><mml:mo>&#x222B;</mml:mo><mml:mrow><mml:mo>&#x2212;</mml:mo><mml:mi mathvariant="normal">&#x221E;</mml:mi></mml:mrow><mml:mrow><mml:mo>+</mml:mo><mml:mi mathvariant="normal">&#x221E;</mml:mi></mml:mrow></mml:msubsup><mml:msup><mml:mrow><mml:mo>[</mml:mo><mml:mi>F</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow><mml:mo>&#x2212;</mml:mo><mml:mi>I</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mrow><mml:mover><mml:mi>y</mml:mi><mml:mo stretchy="false">&#x005E;</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow><mml:mo>]</mml:mo></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup><mml:mrow><mml:mtext>d</mml:mtext></mml:mrow><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:math></disp-formula></p>
<p><disp-formula id="eqn-19"><label>(19)</label><mml:math id="mml-eqn-19" display="block"><mml:mi>F</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:msubsup><mml:mo>&#x222B;</mml:mo><mml:mrow><mml:mo>&#x2212;</mml:mo><mml:mi mathvariant="normal">&#x221E;</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msubsup><mml:mi>p</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow><mml:mi>d</mml:mi><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:math></disp-formula></p>
<p>wherein <inline-formula id="ieqn-105"><mml:math id="mml-ieqn-105"><mml:mi>i</mml:mi></mml:math></inline-formula> is the sample serial number. <inline-formula id="ieqn-106"><mml:math id="mml-ieqn-106"><mml:mi>F</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mo>&#x22C5;</mml:mo><mml:mo>)</mml:mo></mml:mrow></mml:math></inline-formula> represents the cumulative density function; <inline-formula id="ieqn-107"><mml:math id="mml-ieqn-107"><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> denotes the <inline-formula id="ieqn-108"><mml:math id="mml-ieqn-108"><mml:mi>i</mml:mi></mml:math></inline-formula> sample input. And <inline-formula id="ieqn-109"><mml:math id="mml-ieqn-109"><mml:mi>p</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mo>&#x22C5;</mml:mo><mml:mo>)</mml:mo></mml:mrow></mml:math></inline-formula> denotes the probability density function. Smaller CRPS indicates the better probability prediction comprehensive capability of the model.</p>
</sec>
</sec>
</sec>
<sec id="s4">
<label>4</label>
<title>The Example Test</title>
<p>To confirm the proposed model&#x2019;s forecasting effect, the wind power historical data and meteorological data of Rock River Wind Farm from January 01, 2012 to December 31, 2012, including the wind speed, the wind direction, the temperature, and the humidity are adopted. The meteorological data of 10 adjacent wind farms are selected by the Pearson&#x2019;s correlation coefficient method. The data is 1 recording point per hour. Compared with other meteorological data, the correlation of wind speed and wind power data is the strongest. As shown in <xref ref-type="fig" rid="fig-5">Fig. 5</xref>, the higher the wind speed, the greater the corresponding wind power output, but the relationship between them is nonlinear, and when the wind speed reaches a certain point, the power output of the fan will tend to be stable. The relevant characteristics of other meteorological data and wind power are relatively complex, and the proposed method can learn the dynamic complex characteristics well to achieve the forecast target.</p>
<fig id="fig-5">
<label>Figure 5</label>
<caption>
<title>The target site wind speed and wind power data scatter plot</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="EE_40887-fig-5.tif"/>
</fig>
<sec id="s4_1">
<label>4.1</label>
<title>The Input Data Normalization</title>
<p>To prevent the neuron saturation, the normalized input data is required. In this paper, the min-max normalization method is adopted with the expression as:</p>
<p><disp-formula id="eqn-20"><label>(20)</label><mml:math id="mml-eqn-20" display="block"><mml:mrow><mml:mover><mml:mi>x</mml:mi><mml:mo stretchy="false">&#x005E;</mml:mo></mml:mover></mml:mrow><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mo movablelimits="true" form="prefix">max</mml:mo></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mo movablelimits="true" form="prefix">max</mml:mo></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mo movablelimits="true" form="prefix">min</mml:mo></mml:mrow></mml:msub></mml:mrow></mml:mfrac></mml:math></disp-formula></p>
<p>wherein <inline-formula id="ieqn-110"><mml:math id="mml-ieqn-110"><mml:mrow><mml:mover><mml:mi>x</mml:mi><mml:mo stretchy="false">&#x005E;</mml:mo></mml:mover></mml:mrow></mml:math></inline-formula> denotes the normalized data. <inline-formula id="ieqn-111"><mml:math id="mml-ieqn-111"><mml:mi>x</mml:mi></mml:math></inline-formula> denotes the original data. And <inline-formula id="ieqn-112"><mml:math id="mml-ieqn-112"><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mo movablelimits="true" form="prefix">max</mml:mo></mml:mrow></mml:msub></mml:math></inline-formula> and <inline-formula id="ieqn-113"><mml:math id="mml-ieqn-113"><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mo movablelimits="true" form="prefix">min</mml:mo></mml:mrow></mml:msub></mml:math></inline-formula> denote the maximum and minimum value of the original data, respectively.</p>
</sec>
<sec id="s4_2">
<label>4.2</label>
<title>The Model Parameter Setting</title>
<p>The proposed model sets 99 quantile points with the quantile point <inline-formula id="ieqn-114"><mml:math id="mml-ieqn-114"><mml:mi>&#x03C4;</mml:mi></mml:math></inline-formula> &#x003D; [0.01, 0.02, &#x2026;, 0.98, 0.99]. <xref ref-type="table" rid="table-1">Table 1</xref> displays the hyper-parameter settings of the CGA-LSTM model. To demonstrate the superior comprehensive spatial-temporal prediction performance of CGA-LSTM, GCN-LSTM, CNN-LSTM, CGA-MLP and LSTM are selected as the comparison models. GCN-LSTM could process the graph network data. It has the same structure set up as the CGA-LSTM but requires the input of the initial adjacent matrix. CNN-LSTM and LSTM cannot process the graph network data. Therefore, their input is only the meteorological datasets of the targeted wind farm and the structure is cascaded. The CGA-MLP model consists of a CGA module and a fully connected layer (MLP) as a measure of the necessity of LSTM networks. Next, the efficacy of the proposed model and others in the point prediction and probabilistic prediction are compared, respectively.</p>
<table-wrap id="table-1">
<label>Table 1</label>
<caption>
<title>Hyper-parameter settings of CGA-LSTM</title>
</caption>
<table frame="hsides">
<colgroup>
<col align="left"/>
<col align="left"/>
<col align="left"/>
</colgroup>
<thead valign="top">
<tr>
<th>Layer</th>
<th>Parameter</th>
<th>Value</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td rowspan="2">CNN1, 2</td>
<td>Kernel size</td>
<td>2 &#x00D7; 2</td>
</tr>
<tr>
<td>Number of kernel</td>
<td>16, 32</td>
</tr>
<tr>
<td rowspan="2">GAT1, 2</td>
<td>Number of nodes</td>
<td>32, 64</td>
</tr>
<tr>
<td>Number of hands</td>
<td>4, 8</td>
</tr>
<tr>
<td>GAT1</td>
<td>Way of aggregation</td>
<td>Average</td>
</tr>
<tr>
<td>GAT2</td>
<td>Way of aggregation</td>
<td>Average</td>
</tr>
<tr>
<td>LSTM</td>
<td>Number of nodes</td>
<td>64</td>
</tr>
<tr>
<td>Dense</td>
<td>Number of nodes</td>
<td>128, 64</td>
</tr>
<tr>
<td>Dropout</td>
<td>Rate</td>
<td>0.2</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s4_3">
<label>4.3</label>
<title>The Analysis of Prediction Results</title>
<sec id="s4_3_1">
<label>4.3.1</label>
<title>The Analysis of the Point Prediction Effect</title>
<p>In the paper, the predicted value corresponding to 0.5 quantiles of each model is selected as the wind power point forecasting result. <xref ref-type="table" rid="table-2">Table 2</xref> shows the point prediction result error statistics for each model. As can be seen in <xref ref-type="table" rid="table-2">Table 2</xref>, RMSE and NMAPE of CGA-LSTM are the lowest. Compared with other models, RMSE decreases by 0.0741, 0.3035, 0.3732, and 0.3865 MW, respectively, and NMAPE decreases by 0.3133%, 1.1391%, 1.4554%, and 1.548%, respectively. <xref ref-type="table" rid="table-2">Table 2</xref> also indicates that each component of the model contributes to the overall performance and removing any component would lead to a significant drop in the performance. The prediction accuracy of the models using the graph network to extract spatial-temporal features is higher than those using the single wind farm data. Compared with the effect of CGA-LSTM, the wind power prediction accuracy of CGA-LSTM is the highest, indicating that the proposed CGA module is effective in processing spatial-temporal features.</p>
<table-wrap id="table-2">
<label>Table 2</label>
<caption>
<title>The comparison of point forecasting results of different models</title>
</caption>
<table frame="hsides">
<colgroup>
<col align="left"/>
<col align="left"/>
<col align="left"/>
</colgroup>
<thead valign="top">
<tr>
<th>Model</th>
<th>RMSE/MW</th>
<th>NMAPE/%</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td><bold>CGA-LSTM</bold></td>
<td><bold>0.304</bold></td>
<td><bold>1.177</bold></td>
</tr>
<tr>
<td>GCN-LSTM</td>
<td>0.378</td>
<td>1.490</td>
</tr>
<tr>
<td>CNN-LSTM</td>
<td>0.608</td>
<td>2.316</td>
</tr>
<tr>
<td>CGA-MLP</td>
<td>0.677</td>
<td>2.632</td>
</tr>
<tr>
<td>LSTM</td>
<td>0.691</td>
<td>2.725</td>
</tr>
</tbody>
</table>
</table-wrap>
<p><xref ref-type="fig" rid="fig-6">Fig. 6</xref> is the comparison between the predicted value of each model and the actual value of wind power. As displayed in <xref ref-type="fig" rid="fig-6">Fig. 6</xref>, relying on the performance of deep learning, each model has good point prediction performance. However, by comparing the randomly selected magnified sub-graphs, it can be seen more clearly that the predicted value of the CGA-LSTM model is closest to the real value. To summarize, the CGA-LSTM model has better point forecasting performance of short-term wind power.</p>
<fig id="fig-6">
<label>Figure 6</label>
<caption>
<title>Point forecasting results of diverse models</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="EE_40887-fig-6.tif"/>
</fig>
</sec>
<sec id="s4_3_2">
<label>4.3.2</label>
<title>The Analysis of Probability Prediction Effect</title>
<p>The forecasting results at diverse quantiles are gained from CGA-LSTM by using QR. The KDE method was utilized to fit the PDF for each observation point, and <xref ref-type="fig" rid="fig-7">Fig. 7</xref> displays the PDF of 4 randomly selected observation points. As can be seen from <xref ref-type="fig" rid="fig-7">Fig. 7</xref>, most of the real values approach the PDF peak and the predicted median, which indicates the proposed probability forecasting model is efficient.</p>
<fig id="fig-7">
<label>Figure 7</label>
<caption>
<title>The PDF curve at different observation points predicted by CGA-LSTM</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="EE_40887-fig-7a.tif"/><graphic mimetype="image" mime-subtype="tif" xlink:href="EE_40887-fig-7b.tif"/>
</fig>
<p>The error statistics of each model&#x2019;s probabilistic forecasting are shown in <xref ref-type="table" rid="table-3">Table 3</xref>. It can be concluded that:</p>
<table-wrap id="table-3">
<label>Table 3</label>
<caption>
<title>Comparison of probabilistic prediction results of different models</title>
</caption>
<table frame="hsides">
<colgroup>
<col align="left"/>
<col align="left"/>
<col align="left"/>
<col align="left"/>
<col align="left"/>
</colgroup>
<thead valign="top">
<tr>
<th>Model</th>
<th>PINAW</th>
<th>ACE/%</th>
<th>IS/MW</th>
<th>CRPS</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td><bold>CGA-LSTM</bold></td>
<td><bold>0.078</bold></td>
<td><bold>2.291</bold></td>
<td><bold>&#x2212;0.172</bold></td>
<td><bold>0.580</bold></td>
</tr>
<tr>
<td>GCN-LSTM</td>
<td>0.118</td>
<td>4.791</td>
<td>&#x2212;0.240</td>
<td>0.669</td>
</tr>
<tr>
<td>CNN-LSTM</td>
<td>0.153</td>
<td>0.833</td>
<td>&#x2212;0.337</td>
<td>0.710</td>
</tr>
<tr>
<td>LSTM</td>
<td>0.129</td>
<td>0</td>
<td>&#x2212;0.375</td>
<td>0.743</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>First, the ACE average value of CGA-LSTM and all contrast models is no less than 0, indicating that the models&#x2019; prediction interval meets 95% confidence.</p>
<p>The IS and CPRS values of CGA-LSTM are the lowest. The IS absolute value of CGA-LSTM decreases by 0.0674, 0.1649, and 0.2027, respectively. In comparison with other models, the CPRS value of CGA-LSTM decreases by 0.0886, 0.1295, and 0.1631, respectively. It means that CGA-LSTM has narrower prediction intervals and higher acuity, interval prediction comprehensive performance, and probabilistic prediction performance. Based on the above analysis, the CGA-LSTM model proposed in this paper is effective in using spatial-temporal features for probabilistic prediction.</p>
<p>In order to illustrate that CGA-LSTM proposed in this work can better describe the uncertainty of wind power prediction, <xref ref-type="fig" rid="fig-8">Fig. 8</xref> shows the interval forecasting results of diverse models in the same period. It is obvious that the actual value of CGA-LSTM almost all falls within its prediction interval, which indicates that the performance of CGA-LSTM in interval prediction is very reliable. In addition, the figure shows that the proposed model is narrower in the prediction interval compared with contrast models, indicating that the proposed model CGA-LSTM has relatively high interval prediction acuity.</p>
<fig id="fig-8">
<label>Figure 8</label>
<caption>
<title>Interval forecasting results of diverse models</title>
</caption>
<graphic mimetype="image" mime-subtype="tif" xlink:href="EE_40887-fig-8.tif"/>
</fig>
<p>For the purpose of further illustrating the proposed method prediction effectiveness, we also select three methods for comparative experiments, namely SATCN-LSTM [<xref ref-type="bibr" rid="ref-36">36</xref>], TAC-BiLSTM [<xref ref-type="bibr" rid="ref-37">37</xref>] and SANN [<xref ref-type="bibr" rid="ref-38">38</xref>]. The evaluation indexes of point prediction and interval prediction results for the 90% confidence interval of different models are shown in <xref ref-type="table" rid="table-4">Table 4</xref>. It can be seen that the proposed model obtains optimal results on each evaluation index. For point prediction, compared with other models, RMSE and NMAPE of CGA-LSTM are the lowest, which are reduced by 11.8% and 1.1% on average, respectively, which reflects that the feature extraction model CGA can give full play to its own performance, and actively enhance the point prediction accuracy. For interval prediction, under the 90% confidence level, although the absolute values of ACE of CGA-LSTM is not the smallest, PINAW and the absolute values of IS are both the smallest, indicating that the prediction interval generated by this method has good reliability, higher sensitivity and better comprehensive performance. In summary, the method of this paper has certain advantages over other selected methods, especially in the scenario of multiple wind field data, where the proposed model can fully consider the coupling relationship between wind power and meteorological factors, and obtain better prediction results.</p>
<table-wrap id="table-4">
<label>Table 4</label>
<caption>
<title>Comparison of point prediction and interval prediction results of different methods</title>
</caption>
<table frame="hsides">
<colgroup>
<col align="left"/>
<col align="left"/>
<col align="left"/>
<col align="left"/>
<col align="left"/>
<col align="left"/>
</colgroup>
<thead valign="top">
<tr>
<th>Method</th>
<th>RMSE/MW</th>
<th>NMAPE/%</th>
<th>PINAW</th>
<th>ACE/%</th>
<th>IS/MW</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td><bold>CGA-LSTM</bold></td>
<td><bold>0.304</bold></td>
<td><bold>1.177</bold></td>
<td><bold>0.078</bold></td>
<td><bold>2.291</bold></td>
<td><bold>&#x2212;0.172</bold></td>
</tr>
<tr>
<td>SATCN-LSTM [<xref ref-type="bibr" rid="ref-36">36</xref>]</td>
<td>0.354</td>
<td>1.201</td>
<td>0.078</td>
<td>1.239</td>
<td>&#x2212;0.195</td>
</tr>
<tr>
<td>TAC-BiLSTM [<xref ref-type="bibr" rid="ref-37">37</xref>]</td>
<td>0.355</td>
<td>1.183</td>
<td>0.096</td>
<td>2.354</td>
<td>&#x2212;0.209</td>
</tr>
<tr>
<td>SANN [<xref ref-type="bibr" rid="ref-38">38</xref>]</td>
<td>0.327</td>
<td>1.186</td>
<td>0.102</td>
<td>1.859</td>
<td>&#x2212;0.185</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
</sec>
</sec>
<sec id="s5">
<label>5</label>
<title>Conclusion</title>
<p>In this paper, a wind power prediction model of convolution graph attention long short-term memory model based on multi-wind farm spatial-temporal data, namely CGA-LSTM, has been proposed. Firstly, CNN is used to primarily extract the high-dimensional characteristics of the meteorological data at each node. Then, the multi-layer attention network is used to extract the spatial-temporal characteristics of the graph data and LSTM is combined to form the graph attention deep neural network CGA-LSTM, which can effectively extract the spatial-temporal characteristics of the multi-wind farm meteorological data and realize the deterministic and probabilistic prediction of wind power using multi-wind farm meteorological data. Compared with traditional models such as CNN-LSTM, the proposed model has a higher prediction capability. The proposed model has only one target node, that is, the multi-wind farm meteorological data is used to predict the wind power of only one wind farm. In future research, multi-task models can be constructed to predict and model multiple adjacent wind farms simultaneously and share the same graph network information.</p>
</sec>
</body>
<back>
<ack>
<p>Not applicable.</p>
</ack>
<sec><title>Funding Statement</title>
<p>This work was supported by the Science and Technology Project of State Grid Corporation of China (4000-202122070A-0-0-00).</p>
</sec>
<sec><title>Author Contributions</title>
<p>The authors confirm contribution to the paper as follows: study conception and design: Fan Xiao, Ping Xiong; data collection: Chang Ye; analysis and interpretation of results: Yusen Xu, Yeyang Li, Yiqun Kang; draft manuscript preparation: Dan Liu, Nianming Zhang. All authors reviewed the results and approved the final version of the manuscript.</p>
</sec>
<sec sec-type="data-availability"><title>Availability of Data and Materials</title>
<p>Due to some confidentiality and intellectual property issues, we are not able to provide relevant data.</p>
</sec>
<sec sec-type="COI-statement"><title>Conflicts of Interest</title>
<p>The authors declare that they have no conflicts of interest to report regarding the present study.</p>
</sec>
<ref-list content-type="authoryear">
<title>References</title>
<ref id="ref-1"><label>1.</label><mixed-citation publication-type="other"><person-group person-group-type="author"><collab>Global Wind Energy Council (GWEC)</collab></person-group> (<year>2022</year>). <article-title>Global wind report 2022</article-title>. <ext-link ext-link-type="uri" xlink:href="https://gwec.net/global-wind-report-2022">https://gwec.net/global-wind-report-2022</ext-link> <comment>(accessed on 16/10/2023)</comment></mixed-citation></ref>
<ref id="ref-2"><label>2.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Jin</surname>, <given-names>H. P.</given-names></string-name>, <string-name><surname>Shi</surname>, <given-names>L. X.</given-names></string-name>, <string-name><surname>Chen</surname>, <given-names>X. G.</given-names></string-name>, <string-name><surname>Qian</surname>, <given-names>B.</given-names></string-name>, <string-name><surname>Yang</surname>, <given-names>B.</given-names></string-name> <etal>et al.</etal></person-group> (<year>2021</year>). <article-title>Probabilistic wind power forecasting using selective ensemble of finite mixture Gaussian process regression models</article-title>. <source>Renewable Energy</source><italic>,</italic> <volume>174</volume><italic>,</italic> <fpage>1</fpage>&#x2013;<lpage>18</lpage>.</mixed-citation></ref>
<ref id="ref-3"><label>3.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Zhou</surname>, <given-names>M.</given-names></string-name>, <string-name><surname>Wang</surname>, <given-names>B.</given-names></string-name>, <string-name><surname>Guo</surname>, <given-names>S. D.</given-names></string-name>, <string-name><surname>Watada</surname>, <given-names>J.</given-names></string-name></person-group> (<year>2021</year>). <article-title>Multi-objective prediction intervals for wind power forecast based on deep neural networks</article-title>. <source>Information Sciences</source><italic>,</italic> <volume>550</volume><italic>,</italic> <fpage>207</fpage>&#x2013;<lpage>220</lpage>.</mixed-citation></ref>
<ref id="ref-4"><label>4.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Erdem</surname>, <given-names>E.</given-names></string-name>, <string-name><surname>Shi</surname>, <given-names>J.</given-names></string-name></person-group> (<year>2011</year>). <article-title>ARMA based approaches for forecasting the tuple of wind speed and direction</article-title>. <source>Applied Energy</source><italic>,</italic> <volume>88</volume><italic>(</italic><issue>4</issue><italic>),</italic> <fpage>1405</fpage>&#x2013;<lpage>1414</lpage>.</mixed-citation></ref>
<ref id="ref-5"><label>5.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Amini</surname>, <given-names>M. H.</given-names></string-name>, <string-name><surname>Kargarian</surname>, <given-names>A.</given-names></string-name>, <string-name><surname>Karabasoglu</surname>, <given-names>O.</given-names></string-name></person-group> (<year>2016</year>). <article-title>ARIMA-based decoupled time series forecasting of electric vehicle charging demand for stochastic power system operation</article-title>. <source>Electric Power Systems Research</source><italic>,</italic> <volume>140</volume><italic>,</italic> <fpage>378</fpage>&#x2013;<lpage>390</lpage>.</mixed-citation></ref>
<ref id="ref-6"><label>6.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Ren</surname>, <given-names>C.</given-names></string-name>, <string-name><surname>An</surname>, <given-names>N.</given-names></string-name>, <string-name><surname>Wang</surname>, <given-names>J. Z.</given-names></string-name>, <string-name><surname>Li</surname>, <given-names>L.</given-names></string-name>, <string-name><surname>Hu</surname>, <given-names>B.</given-names></string-name> <etal>et al.</etal></person-group> (<year>2014</year>). <article-title>Optimal parameters selection for BP neural network based on particle swarm optimization: A case study of wind speed forecasting</article-title>. <source>Knowledge-Based Systems</source><italic>,</italic> <volume>56</volume><italic>,</italic> <fpage>226</fpage>&#x2013;<lpage>239</lpage>.</mixed-citation></ref>
<ref id="ref-7"><label>7.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Yu</surname>, <given-names>C. J.</given-names></string-name>, <string-name><surname>Li</surname>, <given-names>Y. L.</given-names></string-name>, <string-name><surname>Bao</surname>, <given-names>Y. L.</given-names></string-name>, <string-name><surname>Tang</surname>, <given-names>H. J.</given-names></string-name>, <string-name><surname>Zhai</surname>, <given-names>G. H.</given-names></string-name></person-group> (<year>2018</year>). <article-title>A novel framework for wind speed prediction based on recurrent neural networks and support vector machine</article-title>. <source>Energy Conversion and Management</source><italic>,</italic> <volume>178</volume><italic>,</italic> <fpage>137</fpage>&#x2013;<lpage>145</lpage>.</mixed-citation></ref>
<ref id="ref-8"><label>8.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Demolli</surname>, <given-names>H.</given-names></string-name>, <string-name><surname>Dokuz</surname>, <given-names>A. S.</given-names></string-name>, <string-name><surname>Ecemis</surname>, <given-names>A.</given-names></string-name>, <string-name><surname>Gokcek</surname>, <given-names>M.</given-names></string-name></person-group> (<year>2019</year>). <article-title>Wind power forecasting based on daily wind speed data using machine learning algorithms</article-title>. <source>Energy Conversion and Management</source><italic>,</italic> <volume>198</volume><italic>,</italic> <fpage>111823</fpage>.</mixed-citation></ref>
<ref id="ref-9"><label>9.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Zang</surname>, <given-names>H. X.</given-names></string-name>, <string-name><surname>Cheng</surname>, <given-names>L. L.</given-names></string-name>, <string-name><surname>Ding</surname>, <given-names>T.</given-names></string-name>, <string-name><surname>Cheung</surname>, <given-names>K. W.</given-names></string-name>, <string-name><surname>Wei</surname>, <given-names>Z. N.</given-names></string-name> <etal>et al.</etal></person-group> (<year>2020</year>). <article-title>Day-ahead photovoltaic power forecasting approach based on deep convolutional neural networks and meta learning</article-title>. <source>International Journal of Electrical Power &#x0026; Energy Systems</source><italic>,</italic> <volume>118</volume><italic>,</italic> <fpage>105790</fpage>.</mixed-citation></ref>
<ref id="ref-10"><label>10.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Oh</surname>, <given-names>B. K.</given-names></string-name>, <string-name><surname>Glisic</surname>, <given-names>B.</given-names></string-name>, <string-name><surname>Kim</surname>, <given-names>Y.</given-names></string-name>, <string-name><surname>Park</surname>, <given-names>H. S.</given-names></string-name></person-group> (<year>2019</year>). <article-title>Convolutional neural network-based wind-induced response estimation model for tall buildings</article-title>. <source>Computer-Aided Civil and Infrastructure Engineering</source><italic>,</italic> <volume>34</volume><italic>,</italic> <fpage>843</fpage>&#x2013;<lpage>858</lpage>.</mixed-citation></ref>
<ref id="ref-11"><label>11.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Huang</surname>, <given-names>R. M.</given-names></string-name>, <string-name><surname>Wang</surname>, <given-names>X. H.</given-names></string-name>, <string-name><surname>Fei</surname>, <given-names>F.</given-names></string-name>, <string-name><surname>Li</surname>, <given-names>H. E.</given-names></string-name>, <string-name><surname>Wu</surname>, <given-names>E. Q.</given-names></string-name></person-group> (<year>2022</year>). <article-title>Forecast method of distributed photovoltaic power generation based on EM-WS-CNN neural networks</article-title>. <source>Frontiers in Energy Research</source><italic>,</italic> <volume>10</volume><italic>,</italic> <fpage>902722</fpage>.</mixed-citation></ref>
<ref id="ref-12"><label>12.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Wang</surname>, <given-names>F.</given-names></string-name>, <string-name><surname>Xuan</surname>, <given-names>Z. M.</given-names></string-name>, <string-name><surname>Zhen</surname>, <given-names>Z.</given-names></string-name>, <string-name><surname>Li</surname>, <given-names>K. P.</given-names></string-name>, <string-name><surname>Wang</surname>, <given-names>T. Q.</given-names></string-name> <etal>et al.</etal></person-group> (<year>2020</year>). <article-title>A day-ahead PV power forecasting method based on LSTM-RNN model and time correlation modification under partial daily pattern prediction framework</article-title>. <source>Energy Conversion and Management</source><italic>,</italic> <volume>212</volume><italic>,</italic> <fpage>112766</fpage>.</mixed-citation></ref>
<ref id="ref-13"><label>13.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Yuan</surname>, <given-names>X. H.</given-names></string-name>, <string-name><surname>Chen</surname>, <given-names>C.</given-names></string-name>, <string-name><surname>Jiang</surname>, <given-names>M.</given-names></string-name>, <string-name><surname>Yuan</surname>, <given-names>Y. B.</given-names></string-name></person-group> (<year>2019</year>). <article-title>Prediction interval of wind power using parameter optimized Beta distribution based LSTM model</article-title>. <source>Applied Soft Computing</source><italic>,</italic> <volume>82</volume><italic>,</italic> <fpage>105550</fpage>.</mixed-citation></ref>
<ref id="ref-14"><label>14.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Kisvari</surname>, <given-names>A.</given-names></string-name>, <string-name><surname>Lin</surname>, <given-names>Z.</given-names></string-name>, <string-name><surname>Liu</surname>, <given-names>X. L.</given-names></string-name></person-group> (<year>2021</year>). <article-title>Wind power forecasting&#x2013;A data-driven method along with gated recurrent neural network</article-title>. <source>Renewable Energy</source><italic>,</italic> <volume>163</volume><italic>,</italic> <fpage>1895</fpage>&#x2013;<lpage>1909</lpage>.</mixed-citation></ref>
<ref id="ref-15"><label>15.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Niu</surname>, <given-names>Z. W.</given-names></string-name>, <string-name><surname>Yu</surname>, <given-names>Z. Y.</given-names></string-name>, <string-name><surname>Tang</surname>, <given-names>W. H.</given-names></string-name>, <string-name><surname>Wu</surname>, <given-names>Q. H.</given-names></string-name>, <string-name><surname>Reformat</surname>, <given-names>M.</given-names></string-name></person-group> (<year>2020</year>). <article-title>Wind power forecasting using attention-based gated recurrent unit network</article-title>. <source>Energy</source><italic>,</italic> <volume>196</volume><italic>,</italic> <fpage>117081</fpage>.</mixed-citation></ref>
<ref id="ref-16"><label>16.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Wang</surname>, <given-names>Y. Y.</given-names></string-name>, <string-name><surname>Wang</surname>, <given-names>T. Y.</given-names></string-name>, <string-name><surname>Chen</surname>, <given-names>X. Q.</given-names></string-name>, <string-name><surname>Zeng</surname>, <given-names>X. J.</given-names></string-name>, <string-name><surname>Huang</surname>, <given-names>J. J.</given-names></string-name> <etal>et al.</etal></person-group> (<year>2022</year>). <article-title>Short-term probability density function forecasting of industrial loads based on ConvLSTM-MDN</article-title>. <source>Frontiers in Energy Research</source><italic>,</italic> <volume>10</volume><italic>,</italic> <fpage>891680</fpage>.</mixed-citation></ref>
<ref id="ref-17"><label>17.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Wu</surname>, <given-names>Y. K.</given-names></string-name>, <string-name><surname>Wu</surname>, <given-names>Y. C.</given-names></string-name>, <string-name><surname>Hong</surname>, <given-names>J. S.</given-names></string-name>, <string-name><surname>Phan</surname>, <given-names>L. H.</given-names></string-name>, <string-name><surname>Phan</surname>, <given-names>Q. D.</given-names></string-name></person-group> (<year>2021</year>). <article-title>Probabilistic forecast of wind power generation with data processing and numerical weather predictions</article-title>. <source>IEEE Transactions on Industry Application</source><italic>,</italic> <volume>57</volume><italic>(</italic><issue>1</issue><italic>),</italic> <fpage>36</fpage>&#x2013;<lpage>45</lpage>.</mixed-citation></ref>
<ref id="ref-18"><label>18.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Yin</surname>, <given-names>H.</given-names></string-name>, <string-name><surname>Ou</surname>, <given-names>Z. H.</given-names></string-name>, <string-name><surname>Fu</surname>, <given-names>J. J.</given-names></string-name>, <string-name><surname>Cai</surname>, <given-names>Y. F.</given-names></string-name>, <string-name><surname>Chen</surname>, <given-names>S.</given-names></string-name> <etal>et al.</etal></person-group> (<year>2021</year>). <article-title>A novel transfer learning approach for wind power prediction based on a serio-parallel deep learning architecture</article-title>. <source>Energy</source><italic>,</italic> <volume>234</volume><italic>,</italic> <fpage>121271</fpage>.</mixed-citation></ref>
<ref id="ref-19"><label>19.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Wang</surname>, <given-names>H. Z.</given-names></string-name>, <string-name><surname>Li</surname>, <given-names>G. Q.</given-names></string-name>, <string-name><surname>Wang</surname>, <given-names>G. B.</given-names></string-name>, <string-name><surname>Peng</surname>, <given-names>J. C.</given-names></string-name>, <string-name><surname>Jiang</surname>, <given-names>H.</given-names></string-name> <etal>et al.</etal></person-group> (<year>2017</year>). <article-title>Deep learning based ensemble approach for probabilistic wind power forecasting</article-title>. <source>Applied Energy</source><italic>,</italic> <volume>188</volume><italic>,</italic> <fpage>56</fpage>&#x2013;<lpage>70</lpage>.</mixed-citation></ref>
<ref id="ref-20"><label>20.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Mi</surname>, <given-names>X.</given-names></string-name>, <string-name><surname>Liu</surname>, <given-names>H.</given-names></string-name>, <string-name><surname>Li</surname>, <given-names>Y.</given-names></string-name></person-group> (<year>2019</year>). <article-title>Wind speed prediction model using singular spectrum analysis, empirical mode decomposition and convolutional support vector machine</article-title>. <source>Energy Conversion and Management</source><italic>,</italic> <volume>180</volume><italic>,</italic> <fpage>196</fpage>&#x2013;<lpage>205</lpage>.</mixed-citation></ref>
<ref id="ref-21"><label>21.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Zang</surname>, <given-names>H. X.</given-names></string-name>, <string-name><surname>Xu</surname>, <given-names>R. Q.</given-names></string-name>, <string-name><surname>Cheng</surname>, <given-names>L. L.</given-names></string-name>, <string-name><surname>Ding</surname>, <given-names>T.</given-names></string-name>, <string-name><surname>Liu</surname>, <given-names>L.</given-names></string-name> <etal>et al.</etal></person-group> (<year>2021</year>). <article-title>Residential load forecasting based on LSTM fusing self-attention mechanism with pooling</article-title>. <source>Energy</source><italic>,</italic> <volume>229</volume><italic>,</italic> <fpage>120682</fpage>.</mixed-citation></ref>
<ref id="ref-22"><label>22.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Chen</surname>, <given-names>Y.</given-names></string-name>, <string-name><surname>Zhang</surname>, <given-names>S.</given-names></string-name>, <string-name><surname>Zhang</surname>, <given-names>W. Y.</given-names></string-name>, <string-name><surname>Peng</surname>, <given-names>J. J.</given-names></string-name>, <string-name><surname>Cai</surname>, <given-names>Y. S.</given-names></string-name></person-group> (<year>2019</year>). <article-title>Multifactor spatio-temporal correlation model based on a combination of convolutional neural network and long short-term memory neural network for wind speed forecasting</article-title>. <source>Energy Conversion and Management</source><italic>,</italic> <volume>185</volume><italic>,</italic> <fpage>783</fpage>&#x2013;<lpage>799</lpage>.</mixed-citation></ref>
<ref id="ref-23"><label>23.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Cheng</surname>, <given-names>L. L.</given-names></string-name>, <string-name><surname>Zang</surname>, <given-names>H. X.</given-names></string-name>, <string-name><surname>Wei</surname>, <given-names>Z. N.</given-names></string-name>, <string-name><surname>Ding</surname>, <given-names>T.</given-names></string-name>, <string-name><surname>Xu</surname>, <given-names>R. Q.</given-names></string-name> <etal>et al.</etal></person-group> (<year>2022</year>). <article-title>Short-term solar photovoltaic power prediction learning directly from satellite images with regions of interest</article-title>. <source>IEEE Transactions on Sustainable Energy</source><italic>,</italic> <volume>13</volume><italic>(</italic><issue>1</issue><italic>),</italic> <fpage>629</fpage>&#x2013;<lpage>639</lpage>.</mixed-citation></ref>
<ref id="ref-24"><label>24.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Wang</surname>, <given-names>Z. J.</given-names></string-name>, <string-name><surname>Zhang</surname>, <given-names>J.</given-names></string-name>, <string-name><surname>Zhang</surname>, <given-names>Y.</given-names></string-name>, <string-name><surname>Huang</surname>, <given-names>C.</given-names></string-name>, <string-name><surname>Wang</surname>, <given-names>L.</given-names></string-name></person-group> (<year>2020</year>). <article-title>Short-term wind speed forecasting based on information of neighboring wind farms</article-title>. <source>IEEE Access</source><italic>,</italic> <volume>8</volume><italic>,</italic> <fpage>16760</fpage>&#x2013;<lpage>16770</lpage>.</mixed-citation></ref>
<ref id="ref-25"><label>25.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Scarselli</surname>, <given-names>F.</given-names></string-name>, <string-name><surname>Gori</surname>, <given-names>M.</given-names></string-name>, <string-name><surname>Tsoi</surname>, <given-names>A. C.</given-names></string-name>, <string-name><surname>Hagenbuchner</surname>, <given-names>M.</given-names></string-name>, <string-name><surname>Monfardini</surname>, <given-names>G.</given-names></string-name></person-group> (<year>2009</year>). <article-title>The graph neural network model</article-title>. <source>IEEE Transactions on Neural Networks</source><italic>,</italic> <volume>20</volume><italic>(</italic><issue>1</issue><italic>),</italic> <fpage>61</fpage>&#x2013;<lpage>80</lpage>; <pub-id pub-id-type="pmid">19068426</pub-id></mixed-citation></ref>
<ref id="ref-26"><label>26.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Khodayar</surname>, <given-names>M.</given-names></string-name>, <string-name><surname>Wang</surname>, <given-names>J.</given-names></string-name></person-group> (<year>2019</year>). <article-title>Spatio-temporal graph deep neural network for short-term wind speed forecasting</article-title>. <source>IEEE Transactions on Sustainable Energy</source><italic>,</italic> <volume>10</volume><italic>(</italic><issue>2</issue><italic>),</italic> <fpage>670</fpage>&#x2013;<lpage>681</lpage>.</mixed-citation></ref>
<ref id="ref-27"><label>27.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Geng</surname>, <given-names>X. L.</given-names></string-name>, <string-name><surname>Xu</surname>, <given-names>L. Y.</given-names></string-name>, <string-name><surname>He</surname>, <given-names>X. Y.</given-names></string-name>, <string-name><surname>Yu</surname>, <given-names>J.</given-names></string-name></person-group> (<year>2021</year>). <article-title>Graph optimization neural network with spatio-temporal correlation learning for multi-node offshore wind speed forecasting</article-title>. <source>Renewable Energy</source><italic>,</italic> <volume>180</volume><italic>,</italic> <fpage>1014</fpage>&#x2013;<lpage>1025</lpage>.</mixed-citation></ref>
<ref id="ref-28"><label>28.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Wu</surname>, <given-names>Z. H.</given-names></string-name>, <string-name><surname>Pan</surname>, <given-names>S. R.</given-names></string-name>, <string-name><surname>Chen</surname>, <given-names>F. W.</given-names></string-name>, <string-name><surname>Long</surname>, <given-names>G. D.</given-names></string-name>, <string-name><surname>Zhang</surname>, <given-names>C. Q.</given-names></string-name> <etal>et al.</etal></person-group> (<year>2021</year>). <article-title>A comprehensive survey on graph neural networks</article-title>. <source>IEEE Transactions on Neural Networks and Learning Systems</source><italic>,</italic> <volume>32</volume><italic>(</italic><issue>1</issue><italic>),</italic> <fpage>4</fpage>&#x2013;<lpage>24</lpage>; <pub-id pub-id-type="pmid">32217482</pub-id></mixed-citation></ref>
<ref id="ref-29"><label>29.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Chen</surname>, <given-names>S. H.</given-names></string-name>, <string-name><surname>Varma</surname>, <given-names>R.</given-names></string-name>, <string-name><surname>Sandryhaila</surname>, <given-names>A.</given-names></string-name>, <string-name><surname>Kovacevic</surname>, <given-names>J.</given-names></string-name></person-group> (<year>2015</year>). <article-title>Discrete signal processing on graphs: Sampling theory</article-title>. <source>IEEE Transactions on Signal Processing</source><italic>,</italic> <volume>63</volume><italic>(</italic><issue>24</issue><italic>),</italic> <fpage>6510</fpage>&#x2013;<lpage>6523</lpage>.</mixed-citation></ref>
<ref id="ref-30"><label>30.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Niu</surname>, <given-names>D. X.</given-names></string-name>, <string-name><surname>Sun</surname>, <given-names>L. J.</given-names></string-name>, <string-name><surname>Yu</surname>, <given-names>M.</given-names></string-name>, <string-name><surname>Wang</surname>, <given-names>K. K.</given-names></string-name></person-group> (<year>2022</year>). <article-title>Point and interval forecasting of ultra-short-term wind power based on a data-driven method and hybrid deep learning model</article-title>. <source>Energy</source><italic>,</italic> <volume>254</volume><italic>,</italic> <fpage>124384</fpage>.</mixed-citation></ref>
<ref id="ref-31"><label>31.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Zhang</surname>, <given-names>H.</given-names></string-name>, <string-name><surname>Yan</surname>, <given-names>J.</given-names></string-name>, <string-name><surname>Liu</surname>, <given-names>Y. Q.</given-names></string-name>, <string-name><surname>Gao</surname>, <given-names>Y. Q.</given-names></string-name>, <string-name><surname>Han</surname>, <given-names>S.</given-names></string-name> <etal>et al.</etal></person-group> (<year>2021</year>). <article-title>Multi-source and temporal attention network for probabilistic wind power prediction</article-title>. <source>IEEE Transactions on Sustainable Energy</source><italic>,</italic> <volume>12</volume><italic>(</italic><issue>4</issue><italic>),</italic> <fpage>2205</fpage>&#x2013;<lpage>2218</lpage>.</mixed-citation></ref>
<ref id="ref-32"><label>32.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Sun</surname>, <given-names>Y.</given-names></string-name>, <string-name><surname>Li</surname>, <given-names>B. J.</given-names></string-name>, <string-name><surname>Hu</surname>, <given-names>W. H.</given-names></string-name>, <string-name><surname>Li</surname>, <given-names>Z. Y.</given-names></string-name>, <string-name><surname>Shi</surname>, <given-names>C. Y.</given-names></string-name></person-group> (<year>2022</year>). <article-title>A new framework for short-term wind power probability forecasting considering spatial and temporal dependence of forecast errors</article-title>. <source>Frontiers in Energy Research</source><italic>,</italic> <volume>10</volume><italic>,</italic> <fpage>990989</fpage>.</mixed-citation></ref>
<ref id="ref-33"><label>33.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Wang</surname>, <given-names>Y.</given-names></string-name>, <string-name><surname>Zou</surname>, <given-names>R. M.</given-names></string-name>, <string-name><surname>Liu</surname>, <given-names>F.</given-names></string-name>, <string-name><surname>Zhang</surname>, <given-names>L. J.</given-names></string-name>, <string-name><surname>Liu</surname>, <given-names>Q. Y.</given-names></string-name></person-group> (<year>2021</year>). <article-title>A review of wind speed and wind power forecasting with deep neural networks</article-title>. <source>Applied Energy</source><italic>,</italic> <volume>304</volume><italic>,</italic> <fpage>117766</fpage>.</mixed-citation></ref>
<ref id="ref-34"><label>34.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Peng</surname>, <given-names>X. S.</given-names></string-name>, <string-name><surname>Wang</surname>, <given-names>H. Y.</given-names></string-name>, <string-name><surname>Lang</surname>, <given-names>J. X.</given-names></string-name>, <string-name><surname>Li</surname>, <given-names>W. Z.</given-names></string-name>, <string-name><surname>Xu</surname>, <given-names>Q. Y.</given-names></string-name> <etal>et al.</etal></person-group> (<year>2021</year>). <article-title>EALSTM-QR: Interval wind-power prediction model based on numerical weather prediction and deep learning</article-title>. <source>Energy</source><italic>,</italic> <volume>220</volume><italic>,</italic> <fpage>119692</fpage>.</mixed-citation></ref>
<ref id="ref-35"><label>35.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Zhang</surname>, <given-names>Z. D.</given-names></string-name>, <string-name><surname>Qin</surname>, <given-names>H.</given-names></string-name>, <string-name><surname>Liu</surname>, <given-names>Y. Q.</given-names></string-name>, <string-name><surname>Yao</surname>, <given-names>L. Q.</given-names></string-name>, <string-name><surname>Yu</surname>, <given-names>X.</given-names></string-name> <etal>et al.</etal></person-group> (<year>2019</year>). <article-title>Wind speed forecasting based on quantile regression minimal gated memory network and kernel density estimation</article-title>. <source>Energy Conversion and Management</source><italic>,</italic> <volume>196</volume><italic>,</italic> <fpage>1395</fpage>&#x2013;<lpage>1409</lpage>.</mixed-citation></ref>
<ref id="ref-36"><label>36.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Xiang</surname>, <given-names>L.</given-names></string-name>, <string-name><surname>Liu</surname>, <given-names>J. N.</given-names></string-name>, <string-name><surname>Yang</surname>, <given-names>X.</given-names></string-name>, <string-name><surname>Hu</surname>, <given-names>A. J.</given-names></string-name>, <string-name><surname>Su</surname>, <given-names>H.</given-names></string-name></person-group> (<year>2022</year>). <article-title>Ultra-short term wind power prediction applying a novel model named SATCN-LSTM</article-title>. <source>Energy Conversion and Management</source><italic>,</italic> <volume>252</volume><italic>,</italic> <fpage>115036</fpage>.</mixed-citation></ref>
<ref id="ref-37"><label>37.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Ma</surname>, <given-names>Z. J.</given-names></string-name>, <string-name><surname>Mei</surname>, <given-names>G.</given-names></string-name></person-group> (<year>2022</year>). <article-title>A hybrid attention-based deep learning approach for wind power prediction</article-title>. <source>Applied Energy</source><italic>,</italic> <volume>323</volume><italic>,</italic> <fpage>119608</fpage>.</mixed-citation></ref>
<ref id="ref-38"><label>38.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><surname>Dai</surname>, <given-names>X. R.</given-names></string-name>, <string-name><surname>Liu</surname>, <given-names>G. P.</given-names></string-name>, <string-name><surname>Hu</surname>, <given-names>W. S.</given-names></string-name></person-group> (<year>2023</year>). <article-title>An online-learning-enabled self-attention-based model for ultra-short-term wind power forecasting</article-title>. <source>Energy</source><italic>,</italic> <volume>272</volume><italic>,</italic> <fpage>127173</fpage>.</mixed-citation></ref>
</ref-list>
</back></article>