<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.1 20151215//EN" "http://jats.nlm.nih.gov/publishing/1.1/JATS-journalpublishing1.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="1.1">
<front>
<journal-meta>
<journal-id journal-id-type="pmc">CMC</journal-id>
<journal-id journal-id-type="nlm-ta">CMC</journal-id>
<journal-id journal-id-type="publisher-id">CMC</journal-id>
<journal-title-group>
<journal-title>Computers, Materials &#x0026; Continua</journal-title>
</journal-title-group>
<issn pub-type="epub">1546-2226</issn>
<issn pub-type="ppub">1546-2218</issn>
<publisher>
<publisher-name>Tech Science Press</publisher-name>
<publisher-loc>USA</publisher-loc>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">16727</article-id>
<article-id pub-id-type="doi">10.32604/cmc.2021.016727</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Article</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Suggestion Mining from Opinionated Text of Big Social Media Data</article-title>
<alt-title alt-title-type="left-running-head">Suggestion Mining from Opinionated Text of Big Social Media Data</alt-title>
<alt-title alt-title-type="right-running-head">Suggestion Mining from Opinionated Text of Big Social Media Data</alt-title>
</title-group>
<contrib-group content-type="authors">
<contrib id="author-1" contrib-type="author" corresp="yes">
<name name-style="western">
<surname>Alotaibi</surname>
<given-names>Youseef</given-names>
</name>
<xref ref-type="aff" rid="aff-1">1</xref>
</contrib>
<contrib id="author-2" contrib-type="author">
<name name-style="western">
<surname>Malik</surname>
<given-names>Muhammad Noman</given-names>
</name>
<xref ref-type="aff" rid="aff-2">2</xref></contrib>
<contrib id="author-3" contrib-type="author">
<name name-style="western">
<surname>Khan</surname>
<given-names>Huma Hayat</given-names>
</name>
<xref ref-type="aff" rid="aff-3">3</xref></contrib>
<contrib id="author-4" contrib-type="author">
<name name-style="western">
<surname>Batool</surname>
<given-names>Anab</given-names>
</name>
<xref ref-type="aff" rid="aff-2">2</xref></contrib>
<contrib id="author-5" contrib-type="author">
<name name-style="western">
<surname>Islam</surname>
<given-names>Saif ul</given-names>
</name>
<xref ref-type="aff" rid="aff-4">4</xref></contrib>
<contrib id="author-6" contrib-type="author">
<name name-style="western">
<surname>Alsufyani</surname>
<given-names>Abdulmajeed</given-names>
</name>
<xref ref-type="aff" rid="aff-5">5</xref></contrib>
<contrib id="author-7" contrib-type="author">
<name name-style="western">
<surname>Alghamdi</surname>
<given-names>Saleh</given-names>
</name>
<xref ref-type="aff" rid="aff-6">6</xref></contrib>
<aff id="aff-1"><label>1</label><institution>Department of Computer Science, College of Computer and Information Systems, Umm Al-Qura University</institution>, <country>Saudi Arabia</country></aff>
<aff id="aff-2"><label>2</label><institution>Department of Computer Science, Faculty of Engineering and Computer Sciences, National University of Modern Languages</institution>, <addr-line>Islamabad</addr-line>, <country>Pakistan</country></aff>
<aff id="aff-3"><label>3</label><institution>Department of Software Engineering, Faculty of Engineering and Computer Sciences, National University of Modern Languages</institution>, <addr-line>Islamabad</addr-line>, <country>Pakistan</country></aff>
<aff id="aff-4"><label>4</label><institution>Department of Computer Sciences, Institute of Space Technology</institution>, <addr-line>Islamabad</addr-line>, <country>Pakistan</country></aff>
<aff id="aff-5"><label>5</label><institution>Department of Computer Science, College of Computers and Information Technology, Taif University</institution>, <addr-line>Taif, 21944</addr-line>, <country>Saudi Arabia</country></aff>
<aff id="aff-6"><label>6</label><institution>Department of Information Technology, College of Computers and Information Technology, Taif University</institution>, <addr-line>Taif</addr-line>, <country>Saudi Arabia</country></aff>
</contrib-group>
<author-notes><corresp id="cor1">&#x002A;Corresponding Author: Youseef Alotaibi. Email: <email>yaotaibi@uqu.edu.sa</email></corresp></author-notes>
<pub-date pub-type="epub" date-type="pub" iso-8601-date="2021-03-22"><day>22</day><month>03</month><year>2021</year>
</pub-date>
<volume>68</volume>
<issue>3</issue>
<fpage>3323</fpage>
<lpage>3338</lpage>
<history>
<date date-type="received"><day>09</day><month>01</month><year>2021</year></date>
<date date-type="accepted"><day>15</day><month>03</month><year>2021</year></date>
</history>
<permissions>
<copyright-statement>&#x00A9; 2021 Alotaibi et al.</copyright-statement>
<copyright-year>2021</copyright-year>
<copyright-holder>Alotaibi et al.</copyright-holder>
<license xlink:href="https://creativecommons.org/licenses/by/4.0/">
<license-p>This work is licensed under a <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution 4.0 International License</ext-link>, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
</license>
</permissions>
<self-uri content-type="pdf" xlink:href="TSP_CMC_16727.pdf"></self-uri>
<abstract>
<p>Social media data are rapidly increasing and constitute a source of user opinions and tips on a wide range of products and services. The increasing availability of such big data on biased reviews and blogs creates challenges for customers and businesses in reviewing all content in their decision-making process. To overcome this challenge, extracting suggestions from opinionated text is a possible solution. In this study, the characteristics of suggestions are analyzed and a suggestion mining extraction process is presented for classifying suggestive sentences from online customers&#x2019; reviews. A classification using a word-embedding approach is used via the XGBoost classifier. The two datasets used in this experiment relate to online hotel reviews and Microsoft Windows App Studio discussion reviews. F1, precision, recall, and accuracy scores are calculated. The results demonstrated that the XGBoost classifier outperforms&#x2014;with an accuracy of more than 80%. Moreover, the results revealed that suggestion keywords and phrases are the predominant features for suggestion extraction. Thus, this study contributes to knowledge and practice by comparing feature extraction classifiers and identifying XGBoost as a better suggestion mining process for identifying online reviews.</p>
</abstract>
<kwd-group kwd-group-type="author">
<kwd>Suggestion mining</kwd>
<kwd>word embedding</kwd>
<kwd>Na&#x00EF;ve Bayes</kwd>
<kwd>random forest</kwd>
<kwd>XGBoost</kwd>
<kwd>dataset</kwd></kwd-group>
</article-meta>
</front>
<body>
<sec id="s1">
<label>1</label>
<title>Introduction</title>
<p>Online texts of reviews and blogs are continuously increasing and constitute public opinions regarding products, services, individuals, organizations, or events. The expression of sentences in available online text can be related to sentiments and emotions [<xref ref-type="bibr" rid="ref-1">1</xref>], and generally referred to as opinions, recommendations, instructions, advice, and tips for others regarding any entity. Such opinions can be collectively termed as suggestions [<xref ref-type="bibr" rid="ref-2">2</xref>].</p>
<p>Studies have described suggestion mining as sentence classification, which is based on predicting opinionated text into the binary forms of suggestions and non-suggestions [<xref ref-type="bibr" rid="ref-3">3</xref>&#x2013;<xref ref-type="bibr" rid="ref-5">5</xref>]. The literature has generally defined suggestion mining as the &#x201C;extraction of suggestions from the opinionated text, where suggestions keyword denotes the recommendation, advice, and tips&#x201D; [<xref ref-type="bibr" rid="ref-3">3</xref>]. These suggestions are valuable to customers and business organizations [<xref ref-type="bibr" rid="ref-6">6</xref>] if extracted comprehensively from opinionated text [<xref ref-type="bibr" rid="ref-7">7</xref>]. Suggestions must be extracted using computers because online reviews, blogs, and forums that contain suggestions are continuously increasing, resulting in large datasets [<xref ref-type="bibr" rid="ref-6">6</xref>]. The high data volume makes it challenging to extract suggestions [<xref ref-type="bibr" rid="ref-8">8</xref>]; therefore, automatic suggestion mining has emerged as a new research area [<xref ref-type="bibr" rid="ref-1">1</xref>].</p>
<p>Suggestion mining is an approach that largely emphasizes analyzing and identifying sentences to explore explicitly the suggestions they contain [<xref ref-type="bibr" rid="ref-2">2</xref>]. Identifying opinions about products and services that are discussed on social media is useful to organizations&#x2019; management and to consumers. These opinions offer suggestions that assist management in deciding on improvements to products and services [<xref ref-type="bibr" rid="ref-6">6</xref>]. In addition, consumers can benefit from these suggestions by using them to decide whether to buy a particular product or service. Such increased opinionated text has constituted the major dataset in the majority of recent research [<xref ref-type="bibr" rid="ref-9">9</xref>&#x2013;<xref ref-type="bibr" rid="ref-11">11</xref>]. Some studies have focused on product reviews [<xref ref-type="bibr" rid="ref-4">4</xref>,<xref ref-type="bibr" rid="ref-5">5</xref>,<xref ref-type="bibr" rid="ref-12">12</xref>] related to tourism (e.g., hotel service) [<xref ref-type="bibr" rid="ref-10">10</xref>,<xref ref-type="bibr" rid="ref-11">11</xref>] and on social media data (e.g., Twitter) [<xref ref-type="bibr" rid="ref-13">13</xref>].</p>
<p>Moreover, several challenges in suggestion mining approaches relate to analyzing the sentiments of the sentence, identifying the relationship between suggestions, and selecting annotators for supervised and unsupervised learning [<xref ref-type="bibr" rid="ref-14">14</xref>]. Suggestion mining is a recent research area, and thus, studies on extracting suggestions involving different classifiers and algorithms are relatively limited [<xref ref-type="bibr" rid="ref-15">15</xref>]. Studies related to support vector machines (SVMs) [<xref ref-type="bibr" rid="ref-16">16</xref>], long short-term memory (LSTM) [<xref ref-type="bibr" rid="ref-8">8</xref>], hidden Markov [<xref ref-type="bibr" rid="ref-17">17</xref>], Random Forest [<xref ref-type="bibr" rid="ref-18">18</xref>,<xref ref-type="bibr" rid="ref-19">19</xref>], Na&#x00EF;ve Bayes [<xref ref-type="bibr" rid="ref-20">20</xref>,<xref ref-type="bibr" rid="ref-21">21</xref>], and other areas [<xref ref-type="bibr" rid="ref-22">22</xref>] have also contributed to improvements in suggestion mining.</p>
<p>Thus, the present study is among the few such studies that are aimed at improving suggestion mining results by experimenting with the word-embedding approach and the XGBoost classifier. This study is aimed to capture context and similarity with other words. Furthermore this study contributes by improving the classifier performance through the XGBoost classifier, as compared with Na&#x00EF;ve Bayes and Random Forest. Moreover, variations in the proposed suggestion mining extraction process casting improved suggestion mining results. The remainder of the paper is structured as follows. Section 2 describes related work regarding suggestion mining and Section 3 explains the proposed suggestion mining extraction process. Section 4 describes the detailed experiment results and Section 5 presents a results analysis and discussion. Last, Section 6 describes the conclusion and future work.</p>
</sec>
<sec id="s2">
<label>2</label>
<title>Related Works</title>
<p>Prior approaches to suggestion mining focused on rules for linguistic and supervised machine learning through features that are manually identified. The key supervised learning algorithms used in these studies were the hidden Markov model, the conditional random field (CRF) [<xref ref-type="bibr" rid="ref-9">9</xref>], factorization machines [<xref ref-type="bibr" rid="ref-4">4</xref>], and SVM [<xref ref-type="bibr" rid="ref-2">2</xref>]. Further, these studies used training datasets that had less than 8,000 sentences and an exceedingly imbalanced distribution of classes. Importantly, only a few of these datasets are publicly available. All these datasets contain suggestion class in the minority, and the ratio ranges from 8% to 27% of the entire dataset&#x2019;s sentences.</p>
<p>&#x201C;Suggestion&#x201D; can be defined in two ways. First, a generic definition [<xref ref-type="bibr" rid="ref-11">11</xref>,<xref ref-type="bibr" rid="ref-12">12</xref>] is that &#x201C;a sentence made by a person, usually as a suggestion or an action guide and/or conduct relayed in a particular context.&#x201D; Second, an application-specific definition defines suggestion as &#x201C;sentences where the commenter wishes for a change in an existing product or service&#x201D; [<xref ref-type="bibr" rid="ref-15">15</xref>]. Although the generic definition is applied to all domains, the existing research has recorded evaluating suggestion mining on a solitary domain.</p>
<p>Various studies [<xref ref-type="bibr" rid="ref-23">23</xref>,<xref ref-type="bibr" rid="ref-24">24</xref>] have performed mining on weblogs and forums of what they denote as sentences that reveal advice. This mining is performed using learning methods by. Recently, neural networks and learning algorithms have been utilized for suggestion mining [<xref ref-type="bibr" rid="ref-13">13</xref>]. Tao et al. [<xref ref-type="bibr" rid="ref-13">13</xref>] used pretrained word insertion with a dataset that was related to gold-standard training. In addition, diverse classifiers were compared. These classifiers included manually expressed guidelines and SVM (with a diversity of manually reported features related to lexical, syntactic, and sentiment analysis), convolutional neural networks and LSTM networks.</p>
<p>Similarly, the authors in the study conducted in 2021 [<xref ref-type="bibr" rid="ref-4">4</xref>] engaged supervised learning and achieved suggestion detection on &#x201C;tweets.&#x201D; These suggestions are regarded the phone that was launched by Microsoft. Zucco et al. [<xref ref-type="bibr" rid="ref-14">14</xref>] did not define the suggestions in their work; rather, they reported the objectives of the collection of suggestions, which was to progress and improve the quality and functionality of the product, organization, and service. The authors in [<xref ref-type="bibr" rid="ref-25">25</xref>] delivered an algorithm&#x2014;&#x201C;GloVE&#x201D;&#x2014;to train word embedding to the additional algorithms that highly perform on several benchmark tasks and datasets. The GloVE algorithm has outperformed various other algorithms, such as skip-grams and the continuous bag of words, which are variations of the &#x201C;word2vec&#x201D; model. Therefore, it is a strong base to use pretrained GloVE embeddings [<xref ref-type="bibr" rid="ref-25">25</xref>] to evaluate the performance of the embedding theory using the present study&#x2019;s dataset.</p>
<p>Training task-base embedding is verified as beneficial for tasks regarding short-text classification (e.g., sentiment analysis). In this regard, the authors in [<xref ref-type="bibr" rid="ref-26">26</xref>] reported the trained sentiment-related word embedding by using supervised learning on a large dataset regarding Twitter sentiments, which were characterized through the emotions displayed in the tweets. Recently, studies have focused on suggestion mining in regard to the problems involved in classifying the sentences and experimented with various statistical classifiers and their features [<xref ref-type="bibr" rid="ref-27">27</xref>]. However, improvement in classifiers in terms of their accuracy and datasets is a serious concern to achieve the desired complete results [<xref ref-type="bibr" rid="ref-28">28</xref>]. Thus, the existing algorithms need to be significantly improved to address this gap because it is an emerging and novel nature of classifying the text. Although existing studies have specified the feature extraction classifiers and their accuracies for suggestion mining, it is concluded that none have used the XGBoost classifier to identify suggestions from customer reviews.</p>
<p>Further, earlier studies have also not compared XGBoost with other classifiers to determine the better approach for identifying the suggestions from reviews. Therefore, this study defines suggestion classification and presents a better suggestion mining extraction process to identify suggestions from social media data regarding online customer reviews of the hotel industry. The next section presents the proposed suggestion mining extraction process of the opinionated text of online customer reviews.</p>
</sec>
<sec id="s3">
<label>3</label>
<title>Methodology</title>
<p>This study presents a novel approach to the suggestion mining extraction process, which aims to extract useful features to train the classifier for improved results. <xref ref-type="fig" rid="fig-1">Fig. 1</xref> illustrates the suggestion mining steps used in this study and Algorithm 1 demonstrates the steps in training a model to predict a review as either a suggestion or non-suggestion.</p>
<fig id="fig-1">
<label>Figure 1</label>
<caption>
<title>Suggestion mining extraction steps</title>
</caption><graphic mimetype="image" mime-subtype="png" xlink:href="fig-1.png"/>
</fig>
<sec id="s3_1">
<label>3.1</label>
<title>Preprocessing</title>
<p>First, this study preprocesses the text, which involves two sub-steps&#x2014;data cleansing and data processing&#x2014;to clean the data for further processing. Algorithm 2 describes the details of the preprocessing component.</p>
<sec id="s3_1_1">
<label>3.1.1</label>
<title>Data Cleansing</title>
<p>The primary reason for using the data cleansing approach is to clean unusable data [<xref ref-type="bibr" rid="ref-23">23</xref>]. Generally, online reviews consist of rich information, such as usernames, blank spaces, special characters, and URLs. Removing such unnecessary information can assist in extracting suggestions from the cleaned opinionated text [<xref ref-type="bibr" rid="ref-1">1</xref>]. Therefore, this study performs data cleansing by removing unusable text in suggestion mining. The following information is removed from the dataset, using regular expressions, to ensure a clean dataset ready for further processing.</p>
<p>
<statement>
<label>Algorithm 1: </label>
<title>Training a model</title>
<p><bold><italic>Input:</italic></bold> <inline-formula id="ieqn-1"><!--<alternatives><inline-graphic xlink:href="ieqn-1.png"/>--><!--<tex-math id="tex-ieqn-1"><![CDATA[$\ttextit{Review dataset (reviews, labels) where label}= 1$]]></tex-math>--><mml:math id="mml-ieqn-1"><mml:mstyle class="text"><mml:mtext class="textit" mathvariant="italic">Review&#x00A0;dataset&#x00A0;(reviews,&#x00A0;labels)&#x00A0;where&#x00A0;label</mml:mtext></mml:mstyle><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:math><!--</alternatives>--></inline-formula> <inline-formula id="ieqn-2"><!--<alternatives><inline-graphic xlink:href="ieqn-2.png"/>--><!--<tex-math id="tex-ieqn-2"><![CDATA[$\ttextit{for suggestion and label}= 0$]]></tex-math>--><mml:math id="mml-ieqn-2"><mml:mstyle class="text"><mml:mtext class="textit" mathvariant="italic">for&#x00A0;suggestion&#x00A0;and&#x00A0;label</mml:mtext></mml:mstyle><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:math><!--</alternatives>--></inline-formula> <italic>for non-suggestion</italic></p><p><bold><italic>Output:</italic></bold> <italic>trained model that predicts a review as either a suggestion or non-suggestion</italic></p>
<p><bold><italic>for each</italic></bold> <italic>review</italic> <bold><italic>in</italic></bold> <italic>dataset</italic> <bold><italic>do</italic></bold></p>
<p><inline-formula id="ieqn-3"><!--<alternatives><inline-graphic xlink:href="ieqn-3.png"/>--><!--<tex-math id="tex-ieqn-3"><![CDATA[$\ttextit{tokenizedReviews[ ]} \leftarrow \ttextit{preprocessing(review)}$]]></tex-math>--><mml:math id="mml-ieqn-3"><mml:mstyle class="text"><mml:mtext class="textit" mathvariant="italic">tokenizedReviews[&#x00A0;]</mml:mtext></mml:mstyle><mml:mo>&#x2190;</mml:mo><mml:mstyle class="text"><mml:mtext class="textit" mathvariant="italic">preprocessing(review)</mml:mtext></mml:mstyle></mml:math><!--</alternatives>--></inline-formula></p>
<p><bold><italic>end for</italic></bold></p>
<p><bold><italic>for each</italic></bold> <italic>tokenizedReview</italic> <bold><italic>in</italic></bold> <italic>dataset</italic> <bold><italic>do</italic></bold></p>
<p><italic>//word features in form of unigram, bigram, trigram, or all</italic></p>
<p><inline-formula id="ieqn-4"><!--<alternatives><inline-graphic xlink:href="ieqn-4.png"/>--><!--<tex-math id="tex-ieqn-4"><![CDATA[$\ttextit{wordFeatures[ ]} \leftarrow \ttextit{featureExtraction(tokenizedReview)}$]]></tex-math>--><mml:math id="mml-ieqn-4"><mml:mstyle class="text"><mml:mtext class="textit" mathvariant="italic">wordFeatures[&#x00A0;]</mml:mtext></mml:mstyle><mml:mo>&#x2190;</mml:mo><mml:mstyle class="text"><mml:mtext class="textit" mathvariant="italic">featureExtraction(tokenizedReview)</mml:mtext></mml:mstyle></mml:math><!--</alternatives>--></inline-formula></p>
<p><bold><italic>end for</italic></bold></p>
<p><bold><italic>while</italic></bold> <italic>accuracy is not improved</italic> <bold><italic>do</italic></bold></p>
<p><italic>trainClassifier</italic>(<italic>wordFeatures</italic>)</p>
<p><bold><italic>end while</italic></bold></p>
</statement></p>
<statement>
<label>Algorithm 2: </label>
<title>Data preprocessing</title>
<p><bold><italic>Input:</italic></bold> <italic>Review dataset</italic></p>
<p><bold><italic>Output:</italic></bold> <italic>Tokenized arrays of words</italic></p>
<p><bold><italic>for each</italic></bold> <italic>review in dataset</italic> <bold><italic>do</italic></bold></p>
<p><italic>dataCleansing (</italic><bold><italic>review</italic></bold><italic>)</italic></p>
<p><italic>split review into array of words</italic></p>
<p><bold><italic>for each</italic></bold> <italic>word in review</italic> <bold><italic>do</italic></bold></p>
<p><italic>lowercase (</italic><bold><italic>word</italic></bold><italic>)</italic></p>
<p><italic>stemming (</italic><bold><italic>word</italic></bold><italic>)</italic></p>
<p><bold><italic>end for</italic></bold></p>
<p><bold><italic>end for</italic></bold></p>
</statement>
<list list-type="bullet">
<list-item><p>usernames in the sentences (e.g., @xyz)</p></list-item>
<list-item><p>empty fields</p></list-item>
<list-item><p>unnecessary numbers</p></list-item>
<list-item><p>special characters used by customers and users in their reviews</p></list-item>
<list-item><p>URLs</p></list-item>
</list>
</sec>
<sec id="s3_1_2">
<label>3.1.2</label>
<title>Data Processing</title>
<p>After data cleansing, the following data processing steps are undertaken. First, the tokenization process is applied, which helps decompose the whole sentence stream into portions of words or meaningful elements [<xref ref-type="bibr" rid="ref-23">23</xref>]. These elements are referred to as tokens; for example, words such as &#x201C;suggest,&#x201D; &#x201C;recommend,&#x201D; and &#x201C;please&#x201D; are usually used to express an opinion. Meaningful features lead to classification success. In this study, all words in the review were tokenized using a pretrained version of the Punkt Sentence Tokenizer, from the Natural Language Toolkit (NLTK) library. <xref ref-type="table" rid="table-1">Tab. 1</xref> presents some of the tokens used in this study, which were useful for further data processing. Second, each token is transformed into lower case, to eliminate the repetition of words and terms and to place the entire text in a unique structure. Third, the stemming process is used to unify the words across the entire document and to highlight the uniqueness of words through their stems; for example, &#x201C;computational,&#x201D; &#x201C;compute,&#x2019; and &#x201C;computing&#x201D; stem from &#x201C;compute.&#x201D; During the feature extraction phase, this process helps to avoid duplications. This study used the Porter stemming algorithm to create stems for tokens that were included in the Python NLTK library.</p>
<table-wrap id="table-1">
<label>Table 1</label>
<caption>
<title>Sample of preprocessed tokens from two datasets (hotel reviews [HR], Microsoft windows app studio reviews [MSWASR])</title>
</caption>
<!---->
<table>
<colgroup>
<col/>
<col/>
<col/>
<col/>
</colgroup>
<thead>
<tr>
<th>ID</th>
<th>Review</th>
<th>Dataset</th>
<th>Class</th>
</tr>
</thead>
<tbody>
<tr>
<td>0</td>
<td>[without, doubt, on, of, the, favorite, hotel<inline-formula id="ieqn-5"><!--<alternatives><inline-graphic xlink:href="ieqn-5.png"/>--><!--<tex-math id="tex-ieqn-5"><![CDATA[$\ldots$]]></tex-math>--><mml:math id="mml-ieqn-5"><mml:mo>&#x2026;</mml:mo></mml:math><!--</alternatives>--></inline-formula>]</td>
<td>HR</td>
<td>0</td>
</tr>
<tr>
<td>1</td>
<td>[mistakenly selected, ever, currently<inline-formula id="ieqn-6"><!--<alternatives><inline-graphic xlink:href="ieqn-6.png"/>--><!--<tex-math id="tex-ieqn-6"><![CDATA[$\ldots$]]></tex-math>--><mml:math id="mml-ieqn-6"><mml:mo>&#x2026;</mml:mo></mml:math><!--</alternatives>--></inline-formula>]</td>
<td>MSWASR</td>
<td>1</td>
</tr>
<tr>
<td>2</td>
<td>[a, great, place, to, stay, staff, were, friendly<inline-formula id="ieqn-7"><!--<alternatives><inline-graphic xlink:href="ieqn-7.png"/>--><!--<tex-math id="tex-ieqn-7"><![CDATA[$\ldots$]]></tex-math>--><mml:math id="mml-ieqn-7"><mml:mo>&#x2026;</mml:mo></mml:math><!--</alternatives>--></inline-formula>]</td>
<td>HR</td>
<td>0</td>
</tr>
<tr>
<td>3</td>
<td>[we, only, stay, here, on, night, but, the, ho<inline-formula id="ieqn-8"><!--<alternatives><inline-graphic xlink:href="ieqn-8.png"/>--><!--<tex-math id="tex-ieqn-8"><![CDATA[$\ldots$]]></tex-math>--><mml:math id="mml-ieqn-8"><mml:mo>&#x2026;</mml:mo></mml:math><!--</alternatives>--></inline-formula>]</td>
<td>HR</td>
<td>0</td>
</tr>
<tr>
<td>4</td>
<td>[try other, shadow to distinguish, from content<inline-formula id="ieqn-9"><!--<alternatives><inline-graphic xlink:href="ieqn-9.png"/>--><!--<tex-math id="tex-ieqn-9"><![CDATA[$\ldots$]]></tex-math>--><mml:math id="mml-ieqn-9"><mml:mo>&#x2026;</mml:mo></mml:math><!--</alternatives>--></inline-formula>]</td>
<td>MSWASR</td>
<td>1</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
</sec>
<sec id="s3_2">
<label>3.2</label>
<title>Feature Extraction</title>
<p>Almost all supervised machine learning algorithms can classify data in the form of integer or floating-point vectors [<xref ref-type="bibr" rid="ref-29">29</xref>]. Feature extraction is the process of converting input data into the vector form for use in training classifiers. Machine learning classifiers do not work on data because they attempt to understand and extract data patterns for classification [<xref ref-type="bibr" rid="ref-27">27</xref>,<xref ref-type="bibr" rid="ref-30">30</xref>]. Feature extraction and selection play a primary role in classification accuracy. Using irrelevant features limits the classifiers&#x2019; performance. The proposed suggestion mining extraction process experimented with four different features.</p>
<p>Reviews are converted into vectors containing Boolean values (i.e., 0 or 1) that correspond to unigrams, bigrams, trigrams, and the uni/bi/trigram combination. The translated review is given to classifiers to extract suggestions and non-suggestions. <xref ref-type="table" rid="table-2">Tab. 2</xref> depicts the vector size for each review using these feature extraction techniques. Algorithm 3 describes the review vectorization against unigram features. In the unigram feature extraction process, all words from the preprocessed dataset are removed and a bag of unique words is created. Next, a vector is created for each review by assigning 1 if the word exists in the review, and 0 otherwise. It is common for words such as &#x201C;suggest,&#x201D; &#x201C;recommend,&#x201D; and &#x201C;please&#x201D; to occur in suggestive text.</p>
<table-wrap id="table-2">
<label>Table 2</label>
<caption>
<title>Feature extraction techniques and size</title>
</caption>
<!---->
<table>
<colgroup>
<col/>
<col/>
<col/>
</colgroup>
<thead>
<tr>
<th>Feature techniques</th>
<th>HR (size)</th>
<th>MSWASR (size)</th>
</tr>
</thead>
<tbody>
<tr>
<td>Unigram</td>
<td>2,266</td>
<td>2,782</td>
</tr>
<tr>
<td>Bigram</td>
<td>4,146</td>
<td>4,144</td>
</tr>
<tr>
<td>Trigram</td>
<td>10,015</td>
<td>728</td>
</tr>
<tr>
<td>Uni/bi/trigram combination</td>
<td>7,658</td>
<td>7,500</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Algorithm 4 describes the bigrams feature model. In the bigram feature extraction process, all pairs of words are extracted from the dataset and a bag of bigram is created. For each review, (1, 0) vectors are created, depending on whether the bigram exists. Bigram features are used to cater to suggestive phrases, such as &#x201C;would like,&#x201D; &#x201C;would love,&#x201D; and &#x201C;instead of.&#x201D; Similarly, trigrams phrase examples are &#x201C;should come with&#x201D; and &#x201C;would be nice.&#x201D; Last, a set of unigrams, bigrams, and trigrams are combined and the vector is created. The more meaningful and relevant are the input features, the more will be the classifier&#x2019;s learning and prediction accuracy.</p>
<p>
<statement>
<label>Algorithm 3:</label>
<title>Unigram modelling algorithm</title>
<p><bold><italic>Input:</italic></bold> <italic>Preprocessed reviews, bag of unigrams</italic></p><p><bold><italic>Output:</italic></bold> <italic>Unigram features vector</italic></p>
<p><bold><italic>for each</italic></bold> <italic>review</italic> <bold><italic>in</italic></bold> <italic>preprocessed reviews</italic> <bold><italic>do</italic></bold></p>
<p><bold><italic>for each</italic></bold> <italic>word</italic> <bold><italic>in</italic></bold> <italic>bag of unigrams</italic> <bold><italic>do</italic></bold></p>
<p><bold><italic>if</italic></bold> <italic>word exists</italic> <bold><italic>in</italic></bold> <italic>review</italic> <bold><italic>then</italic></bold></p>
<p><inline-formula id="ieqn-10"><!--<alternatives><inline-graphic xlink:href="ieqn-10.png"/>--><!--<tex-math id="tex-ieqn-10"><![CDATA[$\ttextit{vector[review, word]}= 1$]]></tex-math>--><mml:math id="mml-ieqn-10"><mml:mstyle class="text"><mml:mtext class="textit" mathvariant="italic">vector[review,&#x00A0;word]</mml:mtext></mml:mstyle><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:math><!--</alternatives>--></inline-formula></p>
<p><bold><italic>else</italic></bold></p>
<p><inline-formula id="ieqn-11"><!--<alternatives><inline-graphic xlink:href="ieqn-11.png"/>--><!--<tex-math id="tex-ieqn-11"><![CDATA[$\ttextit{vector[review, word]}= 0$]]></tex-math>--><mml:math id="mml-ieqn-11"><mml:mstyle class="text"><mml:mtext class="textit" mathvariant="italic">vector[review,&#x00A0;word]</mml:mtext></mml:mstyle><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:math><!--</alternatives>--></inline-formula></p>
<p><bold><italic>end if</italic></bold></p>
<p><bold><italic>end for</italic></bold></p>
<p><bold><italic>end for</italic></bold></p>
</statement></p>
<statement>
<label>Algorithm 4:</label> 
<title>Bigram modelling algorithm</title>
<p><bold><italic>Input:</italic></bold> <italic>Preprocessed reviews, bag of unigrams</italic></p><p><bold><italic>Output:</italic></bold> <italic>Unigram features vector</italic></p>
<p><bold><italic>for each</italic></bold> <italic>review</italic> <bold><italic>in</italic></bold> <italic>preprocessed reviews</italic> <bold><italic>do</italic></bold></p>
<p><bold><italic>for each</italic></bold> <italic>word</italic> <bold><italic>in</italic></bold> <italic>bag of unigrams</italic> <bold><italic>do</italic></bold></p>
<p><bold><italic>if</italic></bold> <italic>word exists</italic> <bold><italic>in</italic></bold> <italic>review</italic> <bold><italic>then</italic></bold></p>
<p><inline-formula id="ieqn-12"><!--<alternatives><inline-graphic xlink:href="ieqn-12.png"/>--><!--<tex-math id="tex-ieqn-12"><![CDATA[$\ttextit{vector[review, word]}= 1$]]></tex-math>--><mml:math id="mml-ieqn-12"><mml:mstyle class="text"><mml:mtext class="textit" mathvariant="italic">vector[review,&#x00A0;word]</mml:mtext></mml:mstyle><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:math><!--</alternatives>--></inline-formula></p>
<p><bold><italic>else</italic></bold></p>
<p><inline-formula id="ieqn-13"><!--<alternatives><inline-graphic xlink:href="ieqn-13.png"/>--><!--<tex-math id="tex-ieqn-13"><![CDATA[$\ttextit{vector[review, word]}= 0$]]></tex-math>--><mml:math id="mml-ieqn-13"><mml:mstyle class="text"><mml:mtext class="textit" mathvariant="italic">vector[review,&#x00A0;word]</mml:mtext></mml:mstyle><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:math><!--</alternatives>--></inline-formula></p>
<p><bold><italic>end if</italic></bold></p>
<p><bold><italic>end for</italic></bold></p>
<p><bold><italic>end for</italic></bold></p>
</statement>
<p><xref ref-type="table" rid="table-3">Tab. 3</xref> shows the example association of words using the unigram word feature. The &#x201C;class label&#x201D; column shows whether the review is a suggestion (i.e., 1) or non-suggestion (i.e., 0). Further, in this table, 1 refers to the found association whereas 0 denotes that there is no association with the word in the given sentence.</p>
<table-wrap id="table-3">
<label>Table 3</label>
<caption>
<title>Example association of words using the unigram word feature</title>
</caption>
<!---->
<table>
<colgroup>
<col/>
<col/>
<col/>
<col/>
<col/>
<col/>
<col/>
<col/>
<col/>
<col/>
<col/>
</colgroup>
<thead>
<tr>
<th>Review ID</th>
<th>Class label</th>
<th>Allow</th>
<th>Add</th>
<th>Suggest</th>
<th>Recommend</th>
<th>Please</th>
<th>Support</th>
<th>Visit</th>
<th>New</th>
<th>Help</th>
</tr>
</thead>
<tbody>
<tr>
<td colspan="11">HR</td>
</tr>
<tr>
<td>1</td>
<td>0</td>
<td>1</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>1</td>
<td>1</td>
<td>1</td>
<td>0</td>
</tr>
<tr>
<td>2</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
</tr>
<tr>
<td>3</td>
<td>1</td>
<td>0</td>
<td>0</td>
<td>1</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
</tr>
<tr>
<td colspan="11">MSWASR</td>
</tr>
<tr>
<td>1</td>
<td>1</td>
<td>1</td>
<td>0</td>
<td>1</td>
<td>0</td>
<td>1</td>
<td>1</td>
<td>0</td>
<td>0</td>
<td>1</td>
</tr>
<tr>
<td>2</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>1</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>1</td>
<td>0</td>
<td>0</td>
</tr>
<tr>
<td>3</td>
<td>1</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>1</td>
<td>1</td>
<td>1</td>
<td>0</td>
<td>1</td>
<td>1</td>
</tr>
</tbody>
</table>
</table-wrap>
<sec id="s3_2_1">
<label>3.2.1</label>
<title>Classification</title>
<p>After the feature extraction process, the reviews are ready for classification. The proposed suggestion mining system used XGBoost classifier and compared the results with the Na&#x00EF;ve Bayes and Random Forest algorithms. The XGBoost classifier is a relatively new machine learning algorithm that is based on decision trees and boosting. Nevertheless, it was used in this study because it is highly scalable and provides improved statistics and better results.</p>
</sec>
<sec id="s3_2_2">
<label>3.2.2</label>
<title>Experiment</title>
<p>This study used two datasets of the hotel industry as well as the MSWASR dataset in relation to customer reviews (see <xref ref-type="table" rid="table-4">Tab. 4</xref>). These reviews contain opinionated text with sentences that explicitly express suggestions and non-suggestions. To perform the experiments, a random data subset was created to foresee the overall performance of the algorithms.</p>
<table-wrap id="table-4">
<label>Table 4</label>
<caption>
<title>Datasets used in the experiment</title>
</caption>
<!---->
<table>
<colgroup>
<col/>
<col/>
<col/>
<col/>
<col/>
</colgroup>
<thead>
<tr>
<th>Dataset</th>
<th>Data source</th>
<th>N</th>
<th>S</th>
<th>Purpose</th>
</tr>
</thead>
<tbody>
<tr>
<td>Hotel industry</td>
<td>Datafiniti</td>
<td>34,000</td>
<td>10,500</td>
<td>Extract suggestion</td>
</tr>
<tr>
<td/>
<td>Code source competition</td>
<td>8,500</td>
<td>2,200</td>
<td>Extract suggestion</td>
</tr>
<tr>
<td>MSWASR</td>
<td>Github</td>
<td>9,000</td>
<td>2,700</td>
<td>Extract suggestion</td>
</tr>
</tbody>
</table>
</table-wrap>
<p><xref ref-type="table" rid="table-4">Tab. 4</xref> consists of five columns. First, &#x201C;dataset&#x201D; refers to the nature of the dataset. Second, &#x201C;data source&#x201D; describes the source of data in which the dataset was retrieved. Third, &#x201C;N&#x201D; refers to the total number of data collection instances from the data source. Fourth, &#x201C;S&#x201D; denotes the subset volume of the dataset that was randomly selected for the experiment. Last, &#x201C;purpose&#x201D; describes the tasks that need to be executed in this experiment.</p>
<p>This experiment used 42,000 online reviews from the hotel industry datasets and 9,000 reviews from the MSWASR dataset. All datasets comprised opinionated text (e.g., opinion, advice, suggestion, or tips), from which the experiment aimed to extract suggestions. In this experiment, the hotel industry Datafiniti dataset contained 34,000 data instances for training purposes, in which a subset of 10,500 instances was used to test the dataset. Similarly, the hotel industry Code Source Competition dataset contained 8,500 data instances for training purposes, in which a subset of 2,200 instances was used for evaluation. Further, the MSWASR Github dataset contained 9,000 data instances for training purposes, in which a subset of 2,700 instances was used to test the dataset.</p>
<p>As previously specified, the XGBoost classifier was used to classify suggestions. Initially, data cleansing was performed, which was followed by the tokenization process. The word2vec approach was used to generate word vectors, which continuously improve each time the classifier is executed. Therefore, training the classifier with a training set is important because it can assist in building vocabulary for the test set. This study used an online hotel review dataset to train the classifier. Next, the hotel industry testing datasets and MSWASR&#x2019;s total dataset were used to determine the performance of three classifiers&#x2014;XGBoost, Na&#x00EF;ve Bayes, and Random Forest. To obtain the best performance, the semantic inclusion approach was utilized through a bag of words technique. Therefore, unique words were listed through a bag of words, which generated vectors.</p>
</sec>
</sec>
</sec>
<sec id="s4">
<label>4</label>
<title>Results</title>
<p>The performance measurement results were identified based on precision, recall, F1 score, and accuracy. Precision is generally used to measure the proportion of identification as a result of precision; for example, a precision score of 0.80% indicates that its predictions of suggestive reviews are correct 80% of the time. Next, recall generally refers to the completeness of the classifier used in a given dataset. It describes the proportion of actual positives, which means how many suggestions are identified correctly. Further, the F1 score refers to the average precision and recall; it reveals the highest best and worst values towards 0. Last, accuracy demonstrates the ratio of correctly predicted observations and explains the classifiers&#x2019; ability to predict accurately. Moreover, the average accuracy is calculated to cross-validate the results.</p>
<p>Further, positive and negative scores are categorized into true positive, false positive, true negative, and false negative. True positive means that the output class of review is a found suggestion and that it is correctly classed as a suggestion. Conversely, true negative describes that the output class of review is a non-suggestion and it is correctly classed a non-suggestion. Next, false positive describes that the output class of review is a non-suggestion but it is falsely classed as a suggestion. Conversely, false negative describes that the output class of review is a suggestion but it is falsely classed as a non-suggestion. In addition, the results and analysis are reported based on the unigram, bigram, and trigram models. Moreover, comparative statistics are also reported for all three models.</p>
<p><xref ref-type="table" rid="table-5">Tab. 5</xref> reports statistics regarding the performance measurement of feature identification using the unigram model. <xref ref-type="table" rid="table-5">Tab. 5</xref> comprises two main columns, &#x201C;hotel industry dataset&#x201D; and &#x201C;MSWASR&#x201D; dataset,&#x201D; which are further split into three sub-columns of classifiers&#x2014;Na&#x00EF;ve Bayes, Random Forest, and XGBoost. Suggestions are reported against each classifier in regard to F1, precision, recall, and accuracy.</p>
<table-wrap id="table-5">
<label>Table 5</label>
<caption>
<title>Performance measurement of features using the unigram model</title>
</caption>
<!---->
<table>
<colgroup>
<col/>
<col/>
<col/>
<col/>
<col/>
<col/>
<col/>
</colgroup>
<thead>
<tr>
<th></th>
<th colspan="3">Hotel industry dataset</th>
<th colspan="3">MSWASR dataset</th>
</tr>
<tr>
<th></th>
<th>Na&#x00EF;ve Bayes</th>
<th>Random forest</th>
<th>XGBoost</th>
<th>Na&#x00EF;ve Bayes</th>
<th>Random forest</th>
<th>XGBoost</th>
</tr>
</thead>
<tbody>
<tr>
<td>F1</td>
<td>0.49</td>
<td>0.45</td>
<td>0.53</td>
<td>0.79</td>
<td>0.80</td>
<td>0.89</td>
</tr>
<tr>
<td>Precision</td>
<td>0.66</td>
<td>0.60</td>
<td>0.78</td>
<td>0.71</td>
<td>0.80</td>
<td>0.80</td>
</tr>
<tr>
<td>Recall</td>
<td>0.44</td>
<td>0.30</td>
<td>0.43</td>
<td>0.71</td>
<td>0.80</td>
<td>0.82</td>
</tr>
<tr>
<td>Accuracy</td>
<td>0.70</td>
<td>0.64</td>
<td>0.84</td>
<td>0.81</td>
<td>0.82</td>
<td>0.87</td>
</tr>
<tr>
<td>Average accuracy</td>
<td>0.69</td>
<td>0.62</td>
<td>0.82</td>
<td>0.78</td>
<td>0.80</td>
<td>0.81</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The results for the unigram model reveal the lowest scores for Na&#x00EF;ve Bayes for F1, precision, recall, and accuracy. The highest scores are observed for Random Forest and XGBoost classifiers. However, the experimental results indicate that XGBoost scored higher than Random Forest.</p>
<p><xref ref-type="table" rid="table-6">Tab. 6</xref> reports statistics regarding the performance measurement of feature identification using the bigram model. <xref ref-type="table" rid="table-6">Tab. 6</xref> comprises two main columns that represent both datasets, which are further split into sub-columns that represent the three classifiers. Again, suggestions are reported against each classifier in regard to F1, precision, recall, and accuracy.</p>
<p>The results indicate that all scores are higher for the XGBoost classifier. Random Forest outperformed Na&#x00EF;ve Bayes in all categories except for precision.</p>
<table-wrap id="table-6">
<label>Table 6</label>
<caption>
<title>Performance measurement of features using the bigram model</title>
</caption>
<!---->
<table>
<colgroup>
<col/>
<col/>
<col/>
<col/>
<col/>
<col/>
<col/>
</colgroup>
<thead>
<tr>
<th></th>
<th colspan="3">Hotel industry dataset</th>
<th colspan="3">MSWASR dataset</th>
</tr>
<tr>
<th></th>
<th>Na&#x00EF;ve Bayes</th>
<th>Random forest</th>
<th>XGBoost</th>
<th>Na&#x00EF;ve Bayes</th>
<th>Random forest</th>
<th>XGBoost</th>
</tr>
</thead>
<tbody>
<tr>
<td>F1</td>
<td>0.34</td>
<td>0.43</td>
<td>0.58</td>
<td>0.78</td>
<td>0.79</td>
<td>0.84</td>
</tr>
<tr>
<td>Precision</td>
<td>0.54</td>
<td>0.46</td>
<td>0.87</td>
<td>0.81</td>
<td>0.79</td>
<td>0.81</td>
</tr>
<tr>
<td>Recall</td>
<td>0.35</td>
<td>0.45</td>
<td>0.66</td>
<td>0.80</td>
<td>0.81</td>
<td>0.83</td>
</tr>
<tr>
<td>Accuracy</td>
<td>0.65</td>
<td>0.68</td>
<td>0.81</td>
<td>0.80</td>
<td>0.81</td>
<td>0.87</td>
</tr>
<tr>
<td>Average accuracy</td>
<td>0.63</td>
<td>0.65</td>
<td>0.80</td>
<td>0.79</td>
<td>0.80</td>
<td>0.86</td>
</tr>
</tbody>
</table>
</table-wrap>
<p><xref ref-type="table" rid="table-7">Tab. 7</xref> reports statistics regarding the performance measurement of feature identification using the trigram model. <xref ref-type="table" rid="table-7">Tab. 7</xref> comprises two main columns that represent both datasets, which are further split into sub-columns that represent the three classifiers. Suggestions are once again reported against each classifier in regard to F1, precision, recall, and accuracy.</p>
<p>The results demonstrate that Na&#x00EF;ve Bayes has the lowest scores for F1, precision, recall, and accuracy. The highest scores are obtained by using the Random Forest and XGBoost classifiers. However, the results indicate that XGBoost scored higher than Random Forest.</p>
<table-wrap id="table-7">
<label>Table 7</label>
<caption>
<title>Performance measurement of features using the trigram model</title>
</caption>
<!---->
<table>
<colgroup>
<col/>
<col/>
<col/>
<col/>
<col/>
<col/>
<col/>
</colgroup>
<thead>
<tr>
<th></th>
<th colspan="3">Hotel industry dataset</th>
<th colspan="3">MSWASR dataset</th>
</tr>
<tr>
<th></th>
<th>Na&#x00EF;ve Bayes</th>
<th>Random forest</th>
<th>XGBoost</th>
<th>Na&#x00EF;ve Bayes</th>
<th>Random forest</th>
<th>XGBoost</th>
</tr>
</thead>
<tbody>
<tr>
<td>F1</td>
<td>0.30</td>
<td>0.36</td>
<td>0.55</td>
<td>0.71</td>
<td>0.76</td>
<td>0.78</td>
</tr>
<tr>
<td>Precision</td>
<td>0.36</td>
<td>0.71</td>
<td>0.90</td>
<td>0.75</td>
<td>0.76</td>
<td>0.80</td>
</tr>
<tr>
<td>Recall</td>
<td>0.14</td>
<td>0.21</td>
<td>0.28</td>
<td>0.77</td>
<td>0.78</td>
<td>0.81</td>
</tr>
<tr>
<td>Accuracy</td>
<td>0.67</td>
<td>0.68</td>
<td>0.81</td>
<td>0.77</td>
<td>0.78</td>
<td>0.79</td>
</tr>
<tr>
<td>Average accuracy</td>
<td>0.65</td>
<td>0.65</td>
<td>0.80</td>
<td>0.77</td>
<td>0.78</td>
<td>0.83</td>
</tr>
</tbody>
</table>
</table-wrap>
 
<table-wrap id="table-8">
<label>Table 8</label>
<caption>
<title>Performance measurement of features using the uni/bi/trigram combination model</title>
</caption>
<!---->
<table>
<colgroup>
<col/>
<col/>
<col/>
<col/>
<col/>
<col/>
<col/>
</colgroup>
<thead>
<tr>
<th></th>
<th colspan="3">Hotel industry dataset</th>
<th colspan="3">MSWASR dataset</th>
</tr>
<tr>
<th></th>
<th>Na&#x00EF;ve Bayes</th>
<th>Random forest</th>
<th>XGBoost</th>
<th>Na&#x00EF;ve Bayes</th>
<th>Random forest</th>
<th>XGBoost</th>
</tr>
</thead>
<tbody>
<tr>
<td>F1</td>
<td>0.49</td>
<td>0.45</td>
<td>0.53</td>
<td>0.81</td>
<td>0.76</td>
<td>0.83</td>
</tr>
<tr>
<td>Precision</td>
<td>0.66</td>
<td>0.60</td>
<td>0.78</td>
<td>0.78</td>
<td>0.76</td>
<td>0.82</td>
</tr>
<tr>
<td>Recall</td>
<td>0.44</td>
<td>0.30</td>
<td>0.43</td>
<td>0.78</td>
<td>0.73</td>
<td>0.81</td>
</tr>
<tr>
<td>Accuracy</td>
<td>0.70</td>
<td>0.64</td>
<td>0.84</td>
<td>0.79</td>
<td>0.77</td>
<td>0.82</td>
</tr>
<tr>
<td>Average accuracy</td>
<td>0.69</td>
<td>0.62</td>
<td>0.82</td>
<td>0.79</td>
<td>0.73</td>
<td>0.87</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>In addition, a combined performance evaluation is presented. <xref ref-type="table" rid="table-8">Tab. 8</xref> reports the comparative statistics of the unigram, bigram, and trigram models. <xref ref-type="table" rid="table-8">Tab. 8</xref> comprises two main columns that represent both datasets, which are further split into sub-columns that represent the three classifiers. Suggestions are reported against each classifier in regard to F1, precision, recall, and accuracy.</p>
<p>When the unigram, bigram, and trigram models are executed together, the results varied regarding Na&#x00EF;ve Bayes and Random Forest. Specifically, Random Forest had the lowest scores for F1, precision, recall, and accuracy. Interestingly, Na&#x00EF;ve Bayes performed better in this scenario than in the previous scenarios, in which the models were not executed simultaneously. However, XGBoost once again displayed the highest results.</p>
</sec>
<sec id="s5">
<label>5</label>
<title>Discussion</title>
<p>Based on the experiments conducted in this study, it can be observed that the XGBoost classifier has outperformed the other two classifiers. The findings of the experiments are shown in <xref ref-type="fig" rid="fig-2">Figs. 2</xref>&#x2013;<xref ref-type="fig" rid="fig-5">5</xref>, in which the results for the F1, precision, recall, and accuracy of the three classifiers are reported.</p>
<fig id="fig-2">
<label>Figure 2</label>
<caption>
<title>(a) Unigram model scores for the hotel dataset. (b) Unigram model scores the for MSWASR dataset</title>
</caption><graphic mimetype="image" mime-subtype="png" xlink:href="fig-2.png"/>
</fig>
<fig id="fig-3">
<label>Figure 3</label>
<caption>
<title>(a) Bigram model scores for the hotel dataset. (b) Bigram model scores for the MSWASR dataset</title>
</caption><graphic mimetype="image" mime-subtype="png" xlink:href="fig-3.png"/>
</fig>
<fig id="fig-4">
<label>Figure 4</label>
<caption>
<title>(a) Trigram model scores for the hotel dataset. (b) Trigram model scores for the MSWASR dataset</title>
</caption><graphic mimetype="image" mime-subtype="png" xlink:href="fig-4.png"/>
</fig>
<fig id="fig-5">
<label>Figure 5</label>
<caption>
<title>(a) Uni/Bi/Trigram model scores for the hotel dataset. (b) Uni/Bi/Trigram model scores for the MSWASR dataset 
 
</title>
</caption><graphic mimetype="image" mime-subtype="png" xlink:href="fig-5.png"/>
</fig>
<p>Further, an accuracy comparison among Na&#x00EF;ve Bayes, Random Forest, and XGBoost classifiers was conducted for the hotel industry and MSWASR datasets. The detailed illustration of the accuracy comparison of the three classifiers is shown in <xref ref-type="fig" rid="fig-6">Fig. 6</xref></p>
<fig id="fig-6">
<label>Figure 6</label>
<caption>
<title>Accuracy comparison of Na&#x00EF;ve Bayes, random forest, and XGBoost classifiers for the MSWASR dataset</title>
</caption><graphic mimetype="image" mime-subtype="png" xlink:href="fig-6.png"/>
</fig>
<p>As demonstrated in <xref ref-type="fig" rid="fig-6">Fig. 6a</xref>, Random Forest performed better than Na&#x00EF;ve Bayes in terms of the accuracy of results; however, its results varied among the unigram, bigram, trigram, and the combination of all three models (0.64, 0.68, 0.68, and 0.64, respectively). Interestingly, the results for XGBoost accuracy were better than those for Random Forest in all models (0.84, 0.81, 0.81, and 0.84, respectively). As shown in <xref ref-type="fig" rid="fig-6">Fig. 6b</xref>, similar results were found for the MSWASR dataset, in which Random Forest outperformed Na&#x00EF;ve Bayes in terms of accuracy, but again had varied results among the unigram, bigram, trigram, and the uni/bi/trigram combination (0.82, 0.81, 0.78, and 0.77, respectively). Once again, the results for XGBoost accuracy were better than those for Random Forest in all models (0.87, 0.89, 0.87, and 0.82, respectively). Based on these findings, the XGBoost classifier performed better than the others on the given online review dataset. The Random Forest method is unsustainable because its accuracy values were more distributed than other classifiers.</p>
<p>Further, average accuracies were also analyzed on the given data for the three classifiers on unigram, bigram, trigram, and uni/bi/trigram modelling (see <xref ref-type="fig" rid="fig-7">Figs. 7a</xref> and <xref ref-type="fig" rid="fig-7">7b</xref>). <xref ref-type="fig" rid="fig-7">Fig. 7a</xref> demonstrates that the lowest average accuracy value (0.63) was found in the bigram of Na&#x00EF;ve Bayes and the highest value (0.82) was found in the uni/bi/trigram combination for XGBoost. Likewise, <xref ref-type="fig" rid="fig-7">Fig. 7b</xref> shows that the lowest average accuracy value (0.77) was found in the trigram of Na&#x00EF;ve Bayes and the highest value (0.87) was found in the uni/bi/trigram combination for XGBoost. Although Random Forest achieved better average accuracy results than Na&#x00EF;ve Bayes, there is no significant difference. Conversely, the average accuracy scores for XGBoost were stable and demonstrated fewer distribution scores on the given data in the unigram, bigram, trigram, and uni/bi/trigram combination modelling.</p>
<p>The authors attempted to conduct this study in such a way that the results could be generalized. This became possible by selecting datasets from two different domains (hotel and software industry), in which the various classifiers were executed. The authors have noted that the results would be more generalizable and reliable if they were statistically evaluated through performing non-parametric tests. Because of a lack of any statistical proof, the scope of the analysis is limited.</p>
<fig id="fig-7">
<label>Figure 7</label>
<caption>
<title>(a) Average accuracy comparison of Na&#x00EF;ve Bayes, random forest, and XGBoost classifiers for the hotel industry dataset. (b) Average accuracy comparison of Na&#x00EF;ve Bayes, random forest, and XGBoost classifiers for the MSWASR dataset</title>
</caption><graphic mimetype="image" mime-subtype="png" xlink:href="fig-7.png"/>
</fig>
</sec>
<sec id="s6">
<label>6</label>
<title>Conclusion and Future Work</title>
<p>The availability of opinionated text regarding social media data is increasing, which can assist in decision-making if extracted and analyzed carefully. The extracted suggestions, tips, and advice must be carefully analyzed to improve the business and subsequently benefit customers. Recent studies have explored suggestions from online reviews through different classifiers, such as Random Forest and Na&#x00EF;ve Bayes. The results of these studies are not mature enough and require further improvements. Therefore, this study proposed a suggestion mining process to improve the results further.</p>
<p>To this end, the authors used various techniques, such as word embedding, bag of words, and word2vec. In addition, XGBoost classifiers were used to train the dataset. The results revealed that the XGBoost classifier outperformed and gave an accuracy of 0.8. Moreover, the results also indicated that suggestion keywords and phrases are the predominant features for suggestion extraction. This study contributes to the methodological approach for suggestions mining through the XGBoost classifier that can be replicated in other datasets. It contributes toward the state of knowledge and practice by comparing feature extraction classifiers. In addition, it presents XGBoost as a better suggestion mining extraction process for social media data about online customer reviews of the hotel industry.</p>
<p>Nevertheless, the present study has some limitations. Although this study used more than 8,500 online hotel reviews, it is suggested that further results can be found by using a larger dataset. Second, the test dataset was manually analyzed for its suggestions class, which could impart biasness. However, this limitation was overcome by involving other researchers to perform this task. Future research is needed to improve the suggested suggestion mining extraction process using the XGBoost classifier on larger review datasets. These datasets could be related to products, shopping sites, or services. Another promising research area could be extending the results of the XGBoost classifier by providing beyond domain-based training for its versatility.</p>
</sec>
</body>
<back>
<ack><p>We deeply acknowledge Taif University for supporting this study through Taif University Researchers Supporting Project Number (TURSP-2020/115), Taif University, Taif, Saudi Arabia.</p></ack>
<fn-group><fn fn-type="other"><p><bold>Funding Statement:</bold> This research is funded by Taif University, TURSP-2020/115.</p></fn>
<fn fn-type="conflict"><p><bold>Conflicts of Interest:</bold> The authors declare that they have no conflicts of interest to report regarding the present study.</p></fn></fn-group>
<ref-list content-type="authoryear">
<title>References</title>
<ref id="ref-1"><label>[1]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>M. V.</given-names> <surname>M&#x00E4;ntyl&#x00E4;</surname></string-name>, <string-name><given-names>D.</given-names> <surname>Graziotin</surname></string-name> and <string-name><given-names>M.</given-names> <surname>Kuutila</surname></string-name></person-group>, &#x201C;<article-title>The evolution of sentiment analysis&#x2014;A review of research topics, venues, and top cited papers</article-title>,&#x201D; <source>Computer Science Review</source>, vol. <volume>27</volume>, no. <issue>1</issue>, pp. <fpage>16</fpage>&#x2013;<lpage>32</lpage>, <year>2018</year>.</mixed-citation></ref>
<ref id="ref-2"><label>[2]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>P.</given-names> <surname>Buitelaar</surname></string-name>, <string-name><given-names>I. D.</given-names> <surname>Wood</surname></string-name>, <string-name><given-names>S.</given-names> <surname>Negi</surname></string-name>, <string-name><given-names>M.</given-names> <surname>Arcan</surname></string-name>, <string-name><given-names>J. P.</given-names> <surname>McCrae</surname></string-name> <etal>et al.</etal></person-group><italic>,</italic> &#x201C;<article-title>Mixed emotions: An open-source toolbox for multimodal emotion analysis</article-title>,&#x201D; <source>IEEE Transactions on Multimedia</source>, vol. <volume>20</volume>, no. <issue>9</issue>, pp. <fpage>2454</fpage>&#x2013;<lpage>2465</lpage>, <year>2018</year>.</mixed-citation></ref>
<ref id="ref-3"><label>[3]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>V.</given-names> <surname>Grover</surname></string-name>, <string-name><given-names>R. H.</given-names> <surname>Chiang</surname></string-name>, <string-name><given-names>T. P.</given-names> <surname>Liang</surname></string-name> and <string-name><given-names>D.</given-names> <surname>Zhang</surname></string-name></person-group>, &#x201C;<article-title>Creating strategic business value from big data analytics: A research framework</article-title>,&#x201D; <source>Journal of Management Information Systems</source>, vol. <volume>35</volume>, no. <issue>2</issue>, pp. <fpage>388</fpage>&#x2013;<lpage>423</lpage>, <year>2018</year>.</mixed-citation></ref>
<ref id="ref-4"><label>[4]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>J. A.</given-names> <surname>Garc&#x00ED;a-D&#x00ED;az</surname></string-name>, <string-name><given-names>M.</given-names> <surname>C&#x00E1;novas-Garc&#x00ED;a</surname></string-name>, <string-name><given-names>R.</given-names> <surname>Colomo-Palacios</surname></string-name> and <string-name><given-names>R.</given-names> <surname>Valencia-Garc&#x00ED;a</surname></string-name></person-group>, &#x201C;<article-title>Detecting misogyny in Spanish tweets. An approach based on linguistics features and word embeddings</article-title>,&#x201D; <source>Future Generation Computer Systems</source>, vol. <volume>114</volume>, no. <issue>2</issue>, pp. <fpage>506</fpage>&#x2013;<lpage>518</lpage>, <year>2021</year>.</mixed-citation></ref>
<ref id="ref-5"><label>[5]</label><mixed-citation publication-type="conf-proc"><person-group person-group-type="author"><string-name><given-names>Y.</given-names> <surname>Alotaibi</surname></string-name></person-group>, &#x201C;<article-title>Automated business process modelling for analyzing sustainable system requirements engineering</article-title>,&#x201D; in <conf-name>2020 6th IEEE Int. Conf. on Information Management</conf-name>, London, UK, pp. <fpage>157</fpage>&#x2013;<lpage>161</lpage>, <year>2020</year>.</mixed-citation></ref>
<ref id="ref-6"><label>[6]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>A. K.</given-names> <surname>Nassirtoussi</surname></string-name>, <string-name><given-names>S.</given-names> <surname>Aghabozorgi</surname></string-name>, <string-name><given-names>T. Y.</given-names> <surname>Wah</surname></string-name> and <string-name><given-names>D. C. L.</given-names> <surname>Ngo</surname></string-name></person-group>, &#x201C;<article-title>Text mining for market prediction: A systematic review</article-title>,&#x201D; <source>Expert Systems with Applications</source>, vol. <volume>41</volume>, no. <issue>16</issue>, pp. <fpage>7653</fpage>&#x2013;<lpage>7670</lpage>, <year>2014</year>.</mixed-citation></ref>
<ref id="ref-7"><label>[7]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>S. K.</given-names> <surname>Lakshmanaprabu</surname></string-name>, <string-name><given-names>K.</given-names> <surname>Shankar</surname></string-name>, <string-name><given-names>D.</given-names> <surname>Gupta</surname></string-name>, <string-name><given-names>A.</given-names> <surname>Khanna</surname></string-name>, <string-name><given-names>J. J.</given-names> <surname>Rodrigues</surname></string-name> <etal>et al.</etal></person-group><italic>,</italic> &#x201C;<article-title>Ranking analysis for online customer reviews of products using opinion mining with clustering</article-title>,&#x201D; <source>Complexity</source>, vol. <volume>2018</volume>, pp. <fpage>1</fpage>&#x2013;<lpage>9</lpage>, <year>2018</year>.</mixed-citation></ref>
<ref id="ref-8"><label>[8]</label><mixed-citation publication-type="book"><person-group person-group-type="author"><string-name><given-names>S.</given-names> <surname>Negi</surname></string-name> and <string-name><given-names>P.</given-names> <surname>Buitelaar</surname></string-name></person-group>, &#x201C;<chapter-title>Suggestion mining from opinionated text</chapter-title>,&#x201D; in <source>Sentiment Analysis in Social Networks</source>, <publisher-name>Elsevier</publisher-name>, pp. <fpage>129</fpage>&#x2013;<lpage>139</lpage>, <year>2017</year>.</mixed-citation></ref>
<ref id="ref-9"><label>[9]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>K.</given-names> <surname>Lee</surname></string-name>, <string-name><given-names>S.</given-names> <surname>Han</surname></string-name> and <string-name><given-names>S. H.</given-names> <surname>Myaeng</surname></string-name></person-group>, &#x201C;<article-title>A discourse-aware neural network-based text model for document-level text classification</article-title>,&#x201D; <source>Journal of Information Science</source>, vol. <volume>44</volume>, no. <issue>6</issue>, pp. <fpage>715</fpage>&#x2013;<lpage>735</lpage>, <year>2018</year>.</mixed-citation></ref>
<ref id="ref-10"><label>[10]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>K.</given-names> <surname>Liang</surname></string-name> and <string-name><given-names>J.</given-names> <surname>He</surname></string-name></person-group>, &#x201C;<article-title>Analyzing credit risk among Chinese P2P-lending businesses by integrating text-related soft information</article-title>,&#x201D; <source>Electronic Commerce Research and Applications</source>, vol. <volume>40</volume>, pp. <fpage>100947</fpage>, <year>2020</year>.</mixed-citation></ref>
<ref id="ref-11"><label>[11]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>E.</given-names> <surname>Haris</surname></string-name> and <string-name><given-names>K. H.</given-names> <surname>Gan</surname></string-name></person-group>, &#x201C;<article-title>Mining graphs from travel blogs: A review in the context of tour planning</article-title>,&#x201D; <source>Information Technology &#x0026; Tourism</source>, vol. <volume>17</volume>, no. <issue>4</issue>, pp. <fpage>429</fpage>&#x2013;<lpage>453</lpage>, <year>2017</year>.</mixed-citation></ref>
<ref id="ref-12"><label>[12]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>B.</given-names> <surname>Bansal</surname></string-name> and <string-name><given-names>S.</given-names> <surname>Srivastava</surname></string-name></person-group>, &#x201C;<article-title>Hybrid attribute based sentiment classification of online reviews for consumer intelligence</article-title>,&#x201D; <source>Applied Intelligence</source>, vol. <volume>49</volume>, no. <issue>1</issue>, pp. <fpage>137</fpage>&#x2013;<lpage>149</lpage>, <year>2019</year>.</mixed-citation></ref>
<ref id="ref-13"><label>[13]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>J.</given-names> <surname>Tao</surname></string-name> and <string-name><given-names>L.</given-names> <surname>Zhou</surname></string-name></person-group>, &#x201C;<article-title>A weakly supervised WordNet-Guided deep learning approach to extracting aspect terms from online reviews</article-title>,&#x201D; <source>ACM Transactions on Management Information Systems</source>, vol. <volume>11</volume>, no. <issue>3</issue>, pp. <fpage>1</fpage>&#x2013;<lpage>22</lpage>, <year>2020</year>.</mixed-citation></ref>
<ref id="ref-14"><label>[14]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>C.</given-names> <surname>Zucco</surname></string-name>, <string-name><given-names>B.</given-names> <surname>Calabrese</surname></string-name>, <string-name><given-names>G.</given-names> <surname>Agapito</surname></string-name>, <string-name><given-names>P. H.</given-names> <surname>Guzzi</surname></string-name> and <string-name><given-names>M.</given-names> <surname>Cannataro</surname></string-name></person-group>, &#x201C;<article-title>Sentiment analysis for mining texts and social networks data: Methods and tools</article-title>,&#x201D; <source>Wiley Interdisciplinary Reviews: Data Mining and Knowledge Discovery</source>, vol. <volume>10</volume>, no. <issue>1</issue>, pp. <fpage>e1333</fpage>, <year>2020</year>.</mixed-citation></ref>
<ref id="ref-15"><label>[15]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>R.</given-names> <surname>Piryani</surname></string-name>, <string-name><given-names>D.</given-names> <surname>Madhavi</surname></string-name> and <string-name><given-names>V. K.</given-names> <surname>Singh</surname></string-name></person-group>, &#x201C;<article-title>Analytical mapping of opinion mining and sentiment analysis research during 2000&#x2013;2015</article-title>,&#x201D; <source>Information Processing &#x0026; Management</source>, vol. <volume>53</volume>, no. <issue>1</issue>, pp. <fpage>122</fpage>&#x2013;<lpage>150</lpage>, <year>2017</year>.</mixed-citation></ref>
<ref id="ref-16"><label>[16]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>L.</given-names> <surname>Tao</surname></string-name>, <string-name><given-names>J.</given-names> <surname>Cao</surname></string-name> and <string-name><given-names>F.</given-names> <surname>Liu</surname></string-name></person-group>, &#x201C;<article-title>Quantifying textual terms of items for similarity measurement</article-title>,&#x201D; <source>Information Sciences</source>, vol. <volume>415</volume>, no. <issue>13</issue>, pp. <fpage>269</fpage>&#x2013;<lpage>282</lpage>, <year>2017</year>.</mixed-citation></ref>
<ref id="ref-17"><label>[17]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>M.</given-names> <surname>Kang</surname></string-name>, <string-name><given-names>J.</given-names> <surname>Ahn</surname></string-name> and <string-name><given-names>K.</given-names> <surname>Lee</surname></string-name></person-group>, &#x201C;<article-title>Opinion mining using ensemble text hidden Markov models for text classification</article-title>,&#x201D; <source>Expert Systems with Applications</source>, vol. <volume>94</volume>, no. <issue>6</issue>, pp. <fpage>218</fpage>&#x2013;<lpage>227</lpage>, <year>2018</year>.</mixed-citation></ref>
<ref id="ref-18"><label>[18]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>L.</given-names> <surname>Liu</surname></string-name>, <string-name><given-names>R. C.</given-names> <surname>Chen</surname></string-name>, <string-name><given-names>Q.</given-names> <surname>Zhao</surname></string-name> and <string-name><given-names>S.</given-names> <surname>Zhu</surname></string-name></person-group>, &#x201C;<article-title>Applying a multistage of input feature combination to random forest for improving MRT passenger flow prediction</article-title>,&#x201D; <source>Journal of Ambient Intelligence and Humanized Computing</source>, vol. <volume>10</volume>, no. <issue>11</issue>, pp. <fpage>4515</fpage>&#x2013;<lpage>4532</lpage>, <year>2019</year>.</mixed-citation></ref>
<ref id="ref-19"><label>[19]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>A. F.</given-names> <surname>Subahi</surname></string-name>, <string-name><given-names>Y.</given-names> <surname>Alotaibi</surname></string-name>, <string-name><given-names>O. I.</given-names> <surname>Khalaf</surname></string-name> and <string-name><given-names>F.</given-names> <surname>Ajesh</surname></string-name></person-group>, &#x201C;<article-title>Packet drop battling mechanism for energy aware detection in wireless networks</article-title>,&#x201D; <source>Computers, Materials &#x0026; Continua</source>, vol. <volume>66</volume>, no. <issue>2</issue>, pp. <fpage>2077</fpage>&#x2013;<lpage>2086</lpage>, <year>2021</year>.</mixed-citation></ref>
<ref id="ref-20"><label>[20]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>H.</given-names> <surname>Zhang</surname></string-name>, <string-name><given-names>H.</given-names> <surname>Zhang</surname></string-name>, <string-name><given-names>S.</given-names> <surname>Pirbhulal</surname></string-name>, <string-name><given-names>W.</given-names> <surname>Wu</surname></string-name> and <string-name><given-names>V. H. C. D.</given-names> <surname>Albuquerque</surname></string-name></person-group>, &#x201C;<article-title>Active balancing mechanism for imbalanced medical data in deep learning-based classification models</article-title>,&#x201D; <source>ACM Transactions on Multimedia Computing, Communications, and Applications</source>, vol. <volume>16</volume>, no. <issue>1s</issue>, pp. <fpage>1</fpage>&#x2013;<lpage>15</lpage>, <year>2020</year>.</mixed-citation></ref>
<ref id="ref-21"><label>[21]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>D.</given-names> <surname>Liciotti</surname></string-name>, <string-name><given-names>M.</given-names> <surname>Bernardini</surname></string-name>, <string-name><given-names>L.</given-names> <surname>Romeo</surname></string-name> and <string-name><given-names>E.</given-names> <surname>Frontoni</surname></string-name></person-group>, &#x201C;<article-title>A sequential deep learning application for recognising human activities in smart homes</article-title>,&#x201D; <source>Neurocomputing</source>, vol. <volume>396</volume>, no. <issue>6</issue>, pp. <fpage>501</fpage>&#x2013;<lpage>513</lpage>, <year>2020</year>.</mixed-citation></ref>
<ref id="ref-22"><label>[22]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>R.</given-names> <surname>Arulmurugan</surname></string-name>, <string-name><given-names>K. R.</given-names> <surname>Sabarmathi</surname></string-name> and <string-name><given-names>H.</given-names> <surname>Anandakumar</surname></string-name></person-group>, &#x201C;<article-title>Classification of sentence level sentiment analysis using cloud machine learning techniques</article-title>,&#x201D; <source>Cluster Computing</source>, vol. <volume>22</volume>, no. <issue>1</issue>, pp. <fpage>1199</fpage>&#x2013;<lpage>1209</lpage>, <year>2019</year>.</mixed-citation></ref>
<ref id="ref-23"><label>[23]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>U.</given-names> <surname>Naseem</surname></string-name>, <string-name><given-names>I.</given-names> <surname>Razzak</surname></string-name>, <string-name><given-names>K.</given-names> <surname>Musial</surname></string-name> and <string-name><given-names>M.</given-names> <surname>Imran</surname></string-name></person-group>, &#x201C;<article-title>Transformer based deep intelligent contextual embedding for Twitter sentiment analysis</article-title>,&#x201D; <source>Future Generation Computer Systems</source>, vol. <volume>113</volume>, no. <issue>2</issue>, pp. <fpage>58</fpage>&#x2013;<lpage>69</lpage>, <year>2020</year>.</mixed-citation></ref>
<ref id="ref-24"><label>[24]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>F.</given-names> <surname>Smarandache</surname></string-name>, <string-name><given-names>M.</given-names> <surname>Colhon</surname></string-name>, <string-name><given-names>&#350;.</given-names> <surname>Vl&#x0103;du&#355;escu</surname></string-name> and <string-name><given-names>X.</given-names> <surname>Negrea</surname></string-name></person-group>, &#x201C;<article-title>Word-level neutrosophic sentiment similarity</article-title>,&#x201D; <source>Applied Soft Computing</source>, vol. <volume>80</volume>, no. <issue>1</issue>, pp. <fpage>167</fpage>&#x2013;<lpage>176</lpage>, <year>2019</year>.</mixed-citation></ref>
<ref id="ref-25"><label>[25]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>K.</given-names> <surname>Kowsari</surname></string-name>, <string-name><given-names>K. Jafari</given-names> <surname>Meimandi</surname></string-name>, <string-name><given-names>M.</given-names> <surname>Heidarysafa</surname></string-name>, <string-name><given-names>S.</given-names> <surname>Mendu</surname></string-name>, <string-name><given-names>L.</given-names> <surname>Barnes</surname></string-name> <etal>et al.</etal></person-group><italic>,</italic> &#x201C;<article-title>Text classification algorithms: A survey</article-title>,&#x201D; <source>Information-an International Interdisciplinary Journal</source>, vol. <volume>10</volume>, no. <issue>4</issue>, pp. <fpage>150</fpage>, <year>2019</year>.</mixed-citation></ref>
<ref id="ref-26"><label>[26]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>A.</given-names> <surname>Chatterjee</surname></string-name>, <string-name><given-names>U.</given-names> <surname>Gupta</surname></string-name>, <string-name><given-names>M. K.</given-names> <surname>Chinnakotla</surname></string-name>, <string-name><given-names>R.</given-names> <surname>Srikanth</surname></string-name>, <string-name><given-names>M.</given-names> <surname>Galley</surname></string-name> <etal>et al.</etal></person-group><italic>,</italic> &#x201C;<article-title>Understanding emotions in text using deep learning and big data</article-title>,&#x201D; <source>Computers in Human Behavior</source>, vol. <volume>93</volume>, no. <issue>3</issue>, pp. <fpage>309</fpage>&#x2013;<lpage>317</lpage>, <year>2019</year>.</mixed-citation></ref>
<ref id="ref-27"><label>[27]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>H.</given-names> <surname>Wang</surname></string-name>, <string-name><given-names>K.</given-names> <surname>Tian</surname></string-name>, <string-name><given-names>Z.</given-names> <surname>Wu</surname></string-name> and <string-name><given-names>L.</given-names> <surname>Wang</surname></string-name></person-group>, &#x201C;<article-title>A Short Text Classification Method Based on Convolutional Neural Network and Semantic Extension</article-title>,&#x201D; <source>International Journal of Computational Intelligence Systems</source>, vol. <volume>14</volume>, no. <issue>1</issue>, pp. <fpage>367</fpage>&#x2013;<lpage>375</lpage>, <year>2020</year>.</mixed-citation></ref>
<ref id="ref-28"><label>[28]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>W.</given-names> <surname>Liu</surname></string-name>, <string-name><given-names>P.</given-names> <surname>Liu</surname></string-name>, <string-name><given-names>Y.</given-names> <surname>Yang</surname></string-name>, <string-name><given-names>J.</given-names> <surname>Yi</surname></string-name> and <string-name><given-names>Z.</given-names> <surname>Zhu</surname></string-name></person-group>, &#x201C;<article-title>A &#x00A1;word, part of speech&#x00BF; embedding model for text classification</article-title>,&#x201D; <source>Expert Systems</source>, vol. <volume>36</volume>, no. <issue>6</issue>, pp. <fpage>e12460</fpage>, <year>2019</year>. </mixed-citation></ref>
<ref id="ref-29"><label>[29]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>M. A.</given-names> <surname>Khan</surname></string-name>, <string-name><given-names>M.</given-names> <surname>Rashid</surname></string-name>, <string-name><given-names>M.</given-names> <surname>Sharif</surname></string-name>, <string-name><given-names>K.</given-names> <surname>Javed</surname></string-name> and <string-name><given-names>T.</given-names> <surname>Akram</surname></string-name></person-group>, &#x201C;<article-title>Classification of gastrointestinal diseases of stomach from WCE using improved saliency-based method and discriminant features selection</article-title>,&#x201D; <source>Multimedia Tools and Applications</source>, vol. <volume>78</volume>, no. <issue>19</issue>, pp. <fpage>27743</fpage>&#x2013;<lpage>27770</lpage>, <year>2019</year>.</mixed-citation></ref>
<ref id="ref-30"><label>[30]</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name><given-names>Y.</given-names> <surname>Alotaibi</surname></string-name></person-group>, &#x201C;<article-title>A new database intrusion detection approach based on hybrid meta-heuristics</article-title>,&#x201D; <source>Computers, Materials &#x0026; Continua</source>, vol. <volume>66</volume>, no. <issue>2</issue>, pp. <fpage>1879</fpage>&#x2013;<lpage>1895</lpage>, <year>2021</year>.</mixed-citation></ref>
</ref-list>
</back>
</article>