<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.1 20151215//EN" "http://jats.nlm.nih.gov/publishing/1.1/JATS-journalpublishing1.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="1.1">
<front>
<journal-meta>
<journal-id journal-id-type="pmc">IASC</journal-id>
<journal-id journal-id-type="nlm-ta">IASC</journal-id>
<journal-id journal-id-type="publisher-id">IASC</journal-id>
<journal-title-group>
<journal-title>Intelligent Automation &#x0026; Soft Computing</journal-title>
</journal-title-group>
<issn pub-type="epub">2326-005X</issn>
<issn pub-type="ppub">1079-8587</issn>
<publisher>
<publisher-name>Tech Science Press</publisher-name>
<publisher-loc>USA</publisher-loc>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">19067</article-id>
<article-id pub-id-type="doi">10.32604/iasc.2021.019067</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Article</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Performances of K-Means Clustering Algorithm with Different Distance Metrics</article-title>
<alt-title alt-title-type="left-running-head">Performances of K-Means Clustering Algorithm with Different Distance Metrics</alt-title>
<alt-title alt-title-type="right-running-head">Performances of K-Means Clustering Algorithm with Different Distance Metrics</alt-title>
</title-group>
<contrib-group content-type="authors">
<contrib id="author-1" contrib-type="author">
<name name-style="western">
<surname>Ghazal</surname>
<given-names>Taher M.</given-names>
</name>
<xref ref-type="aff" rid="aff-1">1</xref>
<xref ref-type="aff" rid="aff-2">2</xref>
</contrib>
<contrib id="author-2" contrib-type="author">
<name name-style="western">
<surname>Hussain</surname>
<given-names>Muhammad Zahid</given-names>
</name>
<xref ref-type="aff" rid="aff-3">3</xref>
</contrib>
<contrib id="author-3" contrib-type="author">
<name name-style="western">
<surname>Said</surname>
<given-names>Raed A.</given-names>
</name>
<xref ref-type="aff" rid="aff-5">5</xref>
</contrib>
<contrib id="author-4" contrib-type="author">
<name name-style="western">
<surname>Nadeem</surname>
<given-names>Afrozah</given-names>
</name>
<xref ref-type="aff" rid="aff-6">6</xref>
</contrib>
<contrib id="author-5" contrib-type="author">
<name name-style="western">
<surname>Hasan</surname>
<given-names>Mohammad Kamrul</given-names>
</name>
<xref ref-type="aff" rid="aff-1">1</xref>
</contrib>
<contrib id="author-6" contrib-type="author">
<name name-style="western">
<surname>Ahmad</surname>
<given-names>Munir</given-names>
</name>
<xref ref-type="aff" rid="aff-7">7</xref>
</contrib>
<contrib id="author-7" contrib-type="author" corresp="yes">
<name name-style="western">
<surname>Khan</surname>
<given-names>Muhammad Adnan</given-names>
</name>
<xref ref-type="aff" rid="aff-3">3</xref>
<xref ref-type="aff" rid="aff-4">4</xref>
<email>adnan.khan@riphah.edu.pk</email>
</contrib>
<contrib id="author-8" contrib-type="author">
<name name-style="western">
<surname>Naseem</surname>
<given-names>Muhammad Tahir</given-names>
</name>
<xref ref-type="aff" rid="aff-3">3</xref>
</contrib>
<aff id="aff-1">
<label>1</label><institution>Center for Cyber Security, Faculty of Information Science and Technology, Universiti Kebansaan Malaysia (UKM)</institution>, <addr-line>43600, Bangi, Selangor</addr-line>, <country>Malaysia</country></aff>
<aff id="aff-2">
<label>2</label><institution>School of Information Technology, Skyline University College, University City Sharjah</institution>, <addr-line>1797, Sharjah</addr-line>, <country>UAE</country></aff>
<aff id="aff-3">
<label>3</label><institution>Riphah School of Computing &#x0026; Innovation, Faculty of Computing, Riphah International University</institution>, <addr-line>Lahore Campus, Lahore, 54000</addr-line>, <country>Pakistan</country></aff>
<aff id="aff-4">
<label>4</label><institution>Pattern Recognition and Machine Learning Lab, Department of Software Engineering, Gachon University</institution>, <addr-line>Seongnam, 13557</addr-line>, <country>South Korea</country></aff>
<aff id="aff-5">
<label>5</label><institution>Canadian University Dubai</institution>, <addr-line>Dubai</addr-line>, <country>UAE</country></aff>
<aff id="aff-6">
<label>6</label><institution>Department of Computer Science, Lahore Garrison University</institution>, <addr-line>Lahore, 54000</addr-line>, <country>Pakistan</country></aff>
<aff id="aff-7">
<label>7</label><institution>School of Computer Science, National College of Business Administration &#x0026; Economics</institution>, <addr-line>Lahore, 54000</addr-line>, <country>Pakistan</country></aff>
</contrib-group><author-notes><corresp id="cor1">&#x002A;Corresponding Author: Muhammad Adnan Khan. Email: <email>adnan.khan@riphah.edu.pk</email></corresp></author-notes>
<pub-date pub-type="epub" date-type="pub" iso-8601-date="2021-07-29">
<day>29</day>
<month>7</month>
<year>2021</year>
</pub-date>
<volume>30</volume>
<issue>2</issue>
<fpage>735</fpage>
<lpage>742</lpage>
<history>
<date date-type="received">
<day>31</day>
<month>3</month>
<year>2021</year>
</date>
<date date-type="accepted">
<day>07</day>
<month>5</month>
<year>2021</year>
</date>
</history>
<permissions>
<copyright-statement>&#x00A9; 2021 Ghazal et al.</copyright-statement>
<copyright-year>2021</copyright-year>
<copyright-holder>Ghazal et al.</copyright-holder>
<license xlink:href="https://creativecommons.org/licenses/by/4.0/">
<license-p>This work is licensed under a <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution 4.0 International License</ext-link>, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
</license>
</permissions>
<self-uri content-type="pdf" xlink:href="TSP_IASC_19067.pdf"></self-uri>
<abstract>
<p>Clustering is the process of grouping the data based on their similar properties. Meanwhile, it is the categorization of a set of data into similar groups (clusters), and the elements in each cluster share similarities, where the similarity between elements in the same cluster must be smaller enough to the similarity between elements of different clusters. Hence, this similarity can be considered as a distance measure. One of the most popular clustering algorithms is K-means, where distance is measured between every point of the dataset and centroids of clusters to find similar data objects and assign them to the nearest cluster. Further, there are a series of distance metrics that can be applied to calculate point-to-point distances. In this research, the K-means clustering algorithm is evaluated with three different mathematical metrics in terms of execution time with different datasets and different numbers of clusters. The results indicate that the implementation of Manhattan distance measure metrics achieves the best results in most cases. These results also demonstrate that distance metrics can affect the execution time and the number of clusters created by the K-means algorithm.</p>
</abstract>
<kwd-group kwd-group-type="author">
<kwd>K-means clustering; distance metrics; Euclidean distance; Manhattan distance</kwd>
<kwd>Minkowski distance</kwd>
</kwd-group>
</article-meta>
</front>
<body>
<sec id="s1">
<label>1</label>
<title>Introduction</title>
<p>Clustering is the process of grouping data based on their same properties. All the elements in each cluster should be similar [<xref ref-type="bibr" rid="ref-1">1</xref>]. The types of clustering include data mining algorithmic clustering, dimension reduction, parallel clustering, and MapReduce-based clustering [<xref ref-type="bibr" rid="ref-2">2</xref>]. Meanwhile, partitioned clustering is a type of data mining algorithmic clustering integrating different algorithms like K-means, K-modes, K-medoids, PAM, CLARA, CLARANS, and FCM [<xref ref-type="bibr" rid="ref-3">3</xref>].</p>
<p>One of the widely used algorithms for clustering implementation is the K-means clustering algorithm [<xref ref-type="bibr" rid="ref-4">4</xref>], whose usage is very common due to the best performance for big datasets [<xref ref-type="bibr" rid="ref-4">4</xref>,<xref ref-type="bibr" rid="ref-5">5</xref>]. In the standard K-means algorithm, K points are firstly selected as initial centroids, where each centroid represents a cluster. Then all objects of the dataset are assigned to the centroids with the minimum distances. After the allocation of all data items, centroids are recalculated until no further objects change their cluster [<xref ref-type="bibr" rid="ref-6">6</xref>]. Generally, Euclidean distance is utilized for this purpose in most cases. However, the allocation may take maximum because it needs to recalculate the distance mathematical equation during each iteration. Therefore, many mathematical metrics are proposed to improve the distance calculation [<xref ref-type="bibr" rid="ref-7">7</xref>].</p>
<p>Mathematical distance measure metrics play an essential role in improving the result of the K-means algorithm. Thus, three distance metrics, i.e., Euclidean distance, Manhattan distance, and Minkowski distance, are implemented in this study. Besides, the execution time with different cluster numbers is evaluated on different datasets, where 100000, 200000, 300000, 400000 and 500000 2D points are randomly selected as datasets.</p>
</sec>
<sec id="s2">
<label>2</label>
<title>Literature Review</title>
<p>Many researchers have improved the efficiency and the performance of the K-means clustering algorithm, including the accuracy, the quality of clusters, and the running time of the K-means algorithm [<xref ref-type="bibr" rid="ref-8">8</xref>,<xref ref-type="bibr" rid="ref-9">9</xref>,<xref ref-type="bibr" rid="ref-10">10</xref>]. Kaur et al. [<xref ref-type="bibr" rid="ref-11">11</xref>] presented an improved variant of standard K-means, which provided the image compression with less running time and more efficiency. Dalal et al. [<xref ref-type="bibr" rid="ref-12">12</xref>] introduced an enhanced version of the K-means algorithm to better select starting points so that to meet an improved local minimum. Over the complete dataset, the number of repetitions also decreased. Two things can influence the idea that was dependent upon the best choice of initial centroids. The first one is the novel iterative method, and the second one is optimization formulation. This technique may be implemented on a lot of clustering problems. The technique was also capable of working with many other data mining techniques to obtain the best clustering results. To evaluate the improved algorithm, different experiments were performed on different datasets. As compared to the standard K-means algorithm, the iterations of the proposed K-mean clustering algorithm were fewer to the best performance.</p>
<p>To overcome the drawbacks of standard the K-means clustering algorithm, Gupta et al. [<xref ref-type="bibr" rid="ref-13">13</xref>] presented an improved algorithm without specifying the number of centroids.</p>
<p>There are different clustering types of data mining algorithms like density-based clustering algorithms, hierarchical-based clustering algorithms, partitioning-based clustering algorithms, grid-based clustering algorithms, and model-based clustering algorithms [<xref ref-type="bibr" rid="ref-14">14</xref>]. In partition-based clustering, one of the famous algorithms is the K-means algorithm [<xref ref-type="bibr" rid="ref-14">14</xref>], which first generates a K number of partitions representing the number of groups and then conducts the iterative allocation process of data elements to the group [<xref ref-type="bibr" rid="ref-6">6</xref>].</p>
<p>Bora et al. proposed an experimental study in Matlab to cluster the iris and wine datasets with different distance measures and observed the variation of the performances [<xref ref-type="bibr" rid="ref-15">15</xref>]. Loohach et al. implemented the K-means clustering algorithm with Euclidean distance as well as Manhattan distance metrics and compared the result in terms of the number of iterations. Their results showed that the number of iterations could be affected by the implementation of different distance metrics [<xref ref-type="bibr" rid="ref-16">16</xref>]. Sajana et al. [<xref ref-type="bibr" rid="ref-3">3</xref>] focused on a keen study of different clustering algorithms, highlighting the characteristics of big data techniques and an overview of various types of clustering. Rathore et al. [<xref ref-type="bibr" rid="ref-17">17</xref>] introduced a new technique to implement a K-means clustering algorithm instead of traditional K-means. First, the quality of clusters was improved by removing outlier elements in a dataset; second, the dataset was split into clusters by using a bi-part method. The results were compared with the traditional K-means algorithm and showed better accuracy by removing the de-efficiency.</p>
</sec>
<sec id="s3">
<label>3</label>
<title>Distance Metrics</title>
<p>To find a point-to-point distance between elements and centroid, different distance metrics that play an important role in K-means clustering are measured to assign these elements to related clusters (i.e., centroids). Three distance metrics are implemented and discussed as follows.</p>
<sec id="s3_1">
<label>3.1</label>
<title>Euclidean Distance</title>
<p>Euclidean distance or Euclidean metric is the familiar and straightforward line between two elements or the minimum distance between two objects [<xref ref-type="bibr" rid="ref-18">18</xref>], which is the clearest way of representing the distance between two points. If points (x1, y1) and (x2, y2) are in 2-dimensional space, then the Euclidean distance <inline-formula id="ieqn-1">
<!--<alternatives><inline-graphic xlink:href="ieqn-1.tif"/><tex-math id="tex-ieqn-1"><![CDATA[$d$]]></tex-math>--><mml:math id="mml-ieqn-1"><mml:mi>d</mml:mi></mml:math>
<!--</alternatives>--></inline-formula> between them is</p>
<p><disp-formula id="eqn-1">
<label>(1)</label>
<!--<alternatives>
<graphic mimetype="image" mime-subtype="png" xlink:href="eqn-1.png"/><tex-math id="tex-eqn-1"><![CDATA[$$d = \sqrt {{{\left( {{x_2} - {x_1}} \right)}^2} + {{\left( {{y_2} - {y_1}} \right)}^2}}$$]]></tex-math>--><mml:math id="mml-eqn-1" display="block"><mml:mi>d</mml:mi><mml:mo>&#x003D;</mml:mo><mml:msqrt><mml:mrow><mml:msup><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mrow><mml:msub><mml:mi>x</mml:mi><mml:mn>2</mml:mn></mml:msub></mml:mrow><mml:mo>&#x2212;</mml:mo><mml:mrow><mml:msub><mml:mi>x</mml:mi><mml:mn>1</mml:mn></mml:msub></mml:mrow></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mn>2</mml:mn></mml:msup></mml:mrow><mml:mo>&#x002B;</mml:mo><mml:mrow><mml:msup><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mrow><mml:msub><mml:mi>y</mml:mi><mml:mn>2</mml:mn></mml:msub></mml:mrow><mml:mo>&#x2212;</mml:mo><mml:mrow><mml:msub><mml:mi>y</mml:mi><mml:mn>1</mml:mn></mml:msub></mml:mrow></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mn>2</mml:mn></mml:msup></mml:mrow></mml:msqrt></mml:math>
<!--</alternatives>--></disp-formula></p>
</sec>
<sec id="s3_2">
<label>3.2</label>
<title>Manhattan Distance</title>
<p>In the Manhattan distance function [<xref ref-type="bibr" rid="ref-15">15</xref>], the distance between two points is the sum of the absolute differences of their Cartesian coordinates. Simply it is the sum of the difference between the x-coordinates and y-coordinates. Thus, the Manhattan distance <inline-formula id="ieqn-2">
<!--<alternatives><inline-graphic xlink:href="ieqn-2.tif"/><tex-math id="tex-ieqn-2"><![CDATA[$d\left( {x,y} \right)$]]></tex-math>--><mml:math id="mml-ieqn-2"><mml:mi>d</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>x</mml:mi><mml:mo>,</mml:mo><mml:mi>y</mml:mi></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:math>
<!--</alternatives>--></inline-formula> can be defined as</p>
<p><disp-formula id="eqn-2">
<label>(2)</label>
<!--<alternatives>
<graphic mimetype="image" mime-subtype="png" xlink:href="eqn-2.png"/><tex-math id="tex-eqn-2"><![CDATA[$$d\left( {x,y} \right) = \sum\nolimits_{i = 1}^k {\left| {{x_i} - {y_i}} \right|}$$]]></tex-math>--><mml:math id="mml-eqn-2" display="block"><mml:mi>d</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>x</mml:mi><mml:mo>,</mml:mo><mml:mi>y</mml:mi></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>&#x003D;</mml:mo><mml:msubsup><mml:mo movablelimits="false">&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>&#x003D;</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>k</mml:mi></mml:msubsup><mml:mrow><mml:mrow><mml:mo>|</mml:mo><mml:mrow><mml:mrow><mml:msub><mml:mi>x</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow><mml:mo>&#x2212;</mml:mo><mml:mrow><mml:msub><mml:mi>y</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:mrow><mml:mo>|</mml:mo></mml:mrow></mml:mrow></mml:math>
<!--</alternatives>--></disp-formula></p>
</sec>
<sec id="s3_3">
<label>3.3</label>
<title>Minkowski Distance</title>
<p>Minkowski distance [<xref ref-type="bibr" rid="ref-19">19</xref>] is described as a generalization of two matrices: Euclidean distance metrics and Manhattan distance metric. The formula to calculate Minkowski distance <inline-formula id="ieqn-3">
<!--<alternatives><inline-graphic xlink:href="ieqn-3.tif"/><tex-math id="tex-ieqn-3"><![CDATA[$D\left( {x,\; y} \right)$]]></tex-math>--><mml:math id="mml-ieqn-3"><mml:mi>D</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>x</mml:mi><mml:mo>,</mml:mo><mml:mspace width="thickmathspace"></mml:mspace><mml:mi>y</mml:mi></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:math>
<!--</alternatives>--></inline-formula> is given as follows:</p>
<p><disp-formula id="eqn-3">
<label>(3)</label>
<!--<alternatives>
<graphic mimetype="image" mime-subtype="png" xlink:href="eqn-3.png"/><tex-math id="tex-eqn-3"><![CDATA[$$D\left( {x,y} \right) = {\left( {\sum\nolimits_{i = 1}^k {{{\left( {\left| {{x_i} - {y_i}} \right|} \right)}^q}} } \right)^{1/q}}$$]]></tex-math>--><mml:math id="mml-eqn-3" display="block"><mml:mi>D</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>x</mml:mi><mml:mo>,</mml:mo><mml:mi>y</mml:mi></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>&#x003D;</mml:mo><mml:mrow><mml:msup><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msubsup><mml:mo movablelimits="false">&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>&#x003D;</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>k</mml:mi></mml:msubsup><mml:mrow><mml:mrow><mml:msup><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mrow><mml:mo>|</mml:mo><mml:mrow><mml:mrow><mml:msub><mml:mi>x</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow><mml:mo>&#x2212;</mml:mo><mml:mrow><mml:msub><mml:mi>y</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:mrow><mml:mo>|</mml:mo></mml:mrow></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mi>q</mml:mi></mml:msup></mml:mrow></mml:mrow></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mrow><mml:mn>1</mml:mn><mml:mrow><mml:mo>/</mml:mo></mml:mrow><mml:mi>q</mml:mi></mml:mrow></mml:msup></mml:mrow></mml:math>
<!--</alternatives>--></disp-formula></p>
</sec>
</sec>
<sec id="s4">
<label>4</label>
<title>Methodology</title>
<p>We compared execution time with different numbers of clusters on different datasets. Datasets are randomly selected such as 100000, 200000, 300000, 400000, and 500000 in 2D points. Different distance metrics are implemented to measure the distance between data objects. In this paper, the K-means algorithm is employed by using different distance metrics, whose mechanisms are summarized as follows.</p>
<sec id="s4_1">
<label>4.1</label>
<title>Euclidean Distance Algorithm</title>
<p>In random 2D dataset points:<list list-type="simple"><list-item>
<p>a. Select K number of clusters.</p></list-item><list-item>
<p>b. Select randomly initial centroid points.</p></list-item><list-item>
<p>c. Commute the distance with the Euclidean distance metric of each point from selected cluster centers.</p></list-item></list></p>
<p>Steps of Euclidean distance metric:<list list-type="simple"><list-item>
<p><list list-type="simple"><list-item>
<p>I. Dist1 &#x003D; [ (points - centroid)2 ]</p></list-item><list-item>
<p>II. Dist &#x003D; math.sqrt(sum(Dist1))</p></list-item><list-item>
<p>III. return Dist</p></list-item></list></p></list-item><list-item>
<p>d. Grouping based on the minimum distance.</p></list-item><list-item>
<p>e. If no data points need to be moved, then stop; otherwise, repeat Steps c &#x0026; d.</p></list-item></list></p>
</sec>
<sec id="s4_2">
<label>4.2</label>
<title>Manhattan Distance Algorithm</title>
<p>In random 2D dataset points:<list list-type="simple"><list-item>
<p>a. Select K number of clusters.</p></list-item><list-item>
<p>b. Select randomly initial centroid points.</p></list-item><list-item>
<p>c. Commute the distance with the Manhattan distance metric of each point from selected cluster centers.</p></list-item></list></p>
<p>Steps of Manhattan distance metric:<list list-type="simple"><list-item>
<p><list list-type="simple"><list-item>
<p>I. Dist1 &#x003D; [points &#x2013; centroid]</p></list-item><list-item>
<p>II. Dist &#x003D; sum(abs(Dist1))</p></list-item><list-item>
<p>III. return Dist</p></list-item></list></p></list-item><list-item>
<p>d. Grouping based on minimum distance.</p></list-item><list-item>
<p>e. If no data points need to be moved, then stop; otherwise, repeat Steps c &#x0026; d.</p></list-item></list></p>
</sec>
<sec id="s4_3">
<label>4.3</label>
<title>Minkowski Distance Algorithm</title>
<p>In random 2D dataset points:<list list-type="simple"><list-item>
<p>a. Select K number of clusters.</p></list-item><list-item>
<p>b. Select randomly initial centroid points.</p></list-item><list-item>
<p>c. Commute the distance with the Minkowski distance metric of each point from selected cluster centers.</p></list-item></list></p>
<p>Steps of Minkowski distance metric:<list list-type="simple"><list-item>
<p><list list-type="simple"><list-item>
<p>I. Dist1 &#x003D; [(points - centroid)n]1/n</p></list-item><list-item>
<p>II. Dist &#x003D; sum(abs(Dist1))</p></list-item><list-item>
<p>III. return Dist</p></list-item></list></p></list-item><list-item>
<p>d. Grouping based on minimum distance.</p></list-item><list-item>
<p>e. If no data points need to be moved, then stop; otherwise, repeat Steps c &#x0026; d.</p></list-item></list></p>
</sec>
</sec>
<sec id="s5">
<label>5</label>
<title>Experimental Results</title>
<p>In the experiments, Spyder 3.2.8 was implemented, which is a scientific python development environment and powerful python IDE. All experiments were conducted on a machine consisting of an Intel (R) Core (TM) i5-5300 CPU @ 2.30 GHz with 8 GB RAM. The results were evaluated on different numbers of clusters, such as 4, 6, 8, 10, 12, 14, and 16, using five different datasets. To achieve perfect results, many runs were carried out for each use case, and the running times were measured in milliseconds.</p>
<p>In <xref ref-type="fig" rid="fig-1">Fig. 1</xref>, the running time is compared with three different mathematical methods on four clusters. Different datasets were split into four clusters. Running time is shown along the y-axis in milliseconds, and the compared different datasets of three different distance metrics are shown along the x-axis.</p>
<fig id="fig-1">
<label>Figure 1</label>
<caption>
<title>Four Clusters, Running time of K-means algorithm with three different distance metrics</title>
</caption>
<graphic mimetype="image" mime-subtype="png" xlink:href="IASC_19067-fig-1.png"/>
</fig>
<p>In <xref ref-type="fig" rid="fig-2">Fig. 2</xref>, compared the running time of the K-means algorithm with three different distance metrics like Euclidean, Manhattan, and Minkowski. It is observed that Euclidean and Minkowski&#x2019;s methods take the same time at 500,000 dataset points. But in fewer points, the Euclidean distance metric performs better results as compared to other metrics.</p>
<fig id="fig-2">
<label>Figure 2</label>
<caption>
<title>Six Clusters, Running time of k-means algorithm with three different distance metrics</title>
</caption>
<graphic mimetype="image" mime-subtype="png" xlink:href="IASC_19067-fig-2.png"/>
</fig>
<p>In <xref ref-type="fig" rid="fig-3">Figs. 3</xref>&#x2013;<xref ref-type="fig" rid="fig-7">7</xref>, the running time is compared with three different distance metrics, where different sizes of datasets were splits into 8, 10, 12, 14, and 16 clusters, respectively. Running time is shown along the y-axis in milliseconds, and the compared different datasets of three different distance metrics are shown along the x-axis. It can be seen from experiments that Manhattan Distance performs better for 4, 8, 12, and 14 clusters. Euclidean Distance shows better for 6 and 16 clusters, while Minkowski Distance performs better only for 10 clusters.</p>
<fig id="fig-3">
<label>Figure 3</label>
<caption>
<title>Eight Clusters, Running time of k-means algorithm with three different distance metrics</title>
</caption>
<graphic mimetype="image" mime-subtype="png" xlink:href="IASC_19067-fig-3.png"/>
</fig>
<fig id="fig-4">
<label>Figure 4</label>
<caption>
<title>Ten Clusters, Running time of k-means algorithm with three different distance metrics</title>
</caption>
<graphic mimetype="image" mime-subtype="png" xlink:href="IASC_19067-fig-4.png"/>
</fig>
<fig id="fig-5">
<label>Figure 5</label>
<caption>
<title>Twelve Clusters, Running time of k-means algorithm with three different distance metrics</title>
</caption>
<graphic mimetype="image" mime-subtype="png" xlink:href="IASC_19067-fig-5.png"/>
</fig>
<fig id="fig-6">
<label>Figure 6</label>
<caption>
<title>Fourteen Clusters, Running time of k-means algorithm with three different mathematical models</title>
</caption>
<graphic mimetype="image" mime-subtype="png" xlink:href="IASC_19067-fig-6.png"/>
</fig>
<fig id="fig-7">
<label>Figure 7</label>
<caption>
<title>Sixteen Clusters, Running time of k-means algorithm with three different distance metrics</title>
</caption>
<graphic mimetype="image" mime-subtype="png" xlink:href="IASC_19067-fig-7.png"/>
</fig>
</sec>
<sec id="s6">
<label>6</label>
<title>Conclusions</title>
<p>One of the most popular clustering algorithms is K-means, where different distance metrics are used to find similar data objects. Distance is measured between every point of the dataset and centroids to assign the nearest cluster. In the experiments, the performances of three different metrics (Minkowski Distance, Manhattan Distance, and Euclidean Distance) were measured and compared in terms of execution time with different datasets and different numbers of clusters, i.e., 4, 6, 8, 10, 12, 14, and 16 clusters. It can be seen from experiments that Manhattan Distance performs better for 4, 8, 12, and 14 clusters. Euclidean Distance shows better for 6 and 16 clusters, while Minkowski Distance performs better only for 10 clusters. Overall Manhattan Distance performs better result. In future work, we will try to extend our approach to another partitioned-based clustering algorithm like K-Medoids, CLARA, and CLARANS.</p>
</sec>
</body>
<back>
<ack>
<p>Thanks to our families and colleagues, who provided moral support. We appreciate the linguistic assistance provided by TopEdit (www.topeditsci.com) during the preparation of this manuscript.</p>
</ack><fn-group>
<fn fn-type="other">
<p><bold>Funding Statement:</bold> The authors received no specific funding for this study.</p>
</fn>
<fn fn-type="conflict">
<p><bold>Conflicts of Interest:</bold> The authors declare that they have no conflicts of interest to report regarding the present study.</p>
</fn>
</fn-group>
<ref-list content-type="authoryear">
<title>References</title>
<ref id="ref-1">
<label>1</label><mixed-citation publication-type="journal">
<person-group person-group-type="author"><string-name>
<given-names>H.</given-names> 
<surname>Rehioui</surname></string-name>, <string-name>
<given-names>A.</given-names> 
<surname>Idrissi</surname></string-name>, <string-name>
<given-names>M.</given-names> 
<surname>Abourezq</surname></string-name> and <string-name>
<given-names>F.</given-names> 
<surname>Zegrari</surname></string-name>
</person-group>, &#x201C;
<article-title>Denclue-im: a new approach for big data clustering</article-title>,&#x201D; 
<source>Procedia Computer Science</source>, vol. 
<volume>83</volume>, pp. 
<fpage>560</fpage>&#x2013;
<lpage>567</lpage>, 
<year>2016</year>.</mixed-citation>
</ref>
<ref id="ref-2">
<label>2</label><mixed-citation publication-type="conf-proc">
<person-group person-group-type="author"><string-name>
<given-names>B.</given-names> 
<surname>Zerhari</surname></string-name>, <string-name>
<given-names>A. A.</given-names> 
<surname>Lahcen</surname></string-name> and <string-name>
<given-names>S.</given-names> 
<surname>Mouline</surname></string-name>
</person-group>, &#x201C;
<article-title>Big data clustering: Algorithms and challenges</article-title>,&#x201D; in <conf-name>International Conference on Big Data, Cloud and Applications</conf-name>, 
<publisher-loc>Tetuan, Morocco</publisher-loc>, pp. 
<fpage>1</fpage>&#x2013;
<lpage>8</lpage>, 
<year>2015</year>. </mixed-citation>
</ref>
<ref id="ref-3">
<label>3</label><mixed-citation publication-type="journal">
<person-group person-group-type="author"><string-name>
<given-names>T.</given-names> 
<surname>Sajana</surname></string-name>, <string-name>
<given-names>C. S.</given-names> 
<surname>Rani</surname></string-name> and <string-name>
<given-names>K. V.</given-names> 
<surname>Narayana</surname></string-name>
</person-group>, &#x201C;
<article-title>A survey on clustering techniques for big data mining</article-title>,&#x201D; 
<source>Indian Journal of Science and Technology</source>, vol. 
<volume>9</volume>, no. 
<issue>3</issue>, pp. 
<fpage>10</fpage>&#x2013;
<lpage>16</lpage>, 
<year>2016</year>.</mixed-citation>
</ref>
<ref id="ref-4">
<label>4</label><mixed-citation publication-type="journal">
<person-group person-group-type="author"><string-name>
<given-names>M.</given-names> 
<surname>Wu</surname></string-name>, <string-name>
<given-names>X.</given-names> 
<surname>Li</surname></string-name>, <string-name>
<given-names>C.</given-names> 
<surname>Liu</surname></string-name>, <string-name>
<given-names>M.</given-names> 
<surname>Liu</surname></string-name>, <string-name>
<given-names>N.</given-names> 
<surname>Zhao</surname></string-name> <etal>et al.</etal>
</person-group><italic>,</italic> &#x201C;
<article-title>Robust global motion estimation for video security based on improved k-means clustering</article-title>,&#x201D; 
<source>Journal of Ambient Intelligence and Humanized Computing</source>, vol. 
<volume>10</volume>, no. 
<issue>2</issue>, pp. 
<fpage>439</fpage>&#x2013;
<lpage>448</lpage>, 
<year>2019</year>.</mixed-citation>
</ref>
<ref id="ref-5">
<label>5</label><mixed-citation publication-type="journal">
<person-group person-group-type="author"><string-name>
<given-names>R.</given-names> 
<surname>Jothi</surname></string-name>, <string-name>
<given-names>S. K.</given-names> 
<surname>Mohanty</surname></string-name> and <string-name>
<given-names>A.</given-names> 
<surname>Ojha</surname></string-name>
</person-group>, &#x201C;
<article-title>Dk-means: a deterministic k-means clustering algorithm for gene expression analysis</article-title>,&#x201D; 
<source>Pattern Analysis and Applications</source>, vol. 
<volume>22</volume>, no. 
<issue>2</issue>, pp. 
<fpage>649</fpage>&#x2013;
<lpage>667</lpage>, 
<year>2019</year>.</mixed-citation>
</ref>
<ref id="ref-6">
<label>6</label><mixed-citation publication-type="journal">
<person-group person-group-type="author"><string-name>
<given-names>T.</given-names> 
<surname>Velmurugan</surname></string-name> and <string-name>
<given-names>T.</given-names> 
<surname>Santhanam</surname></string-name>
</person-group>, &#x201C;
<article-title>A survey of partition-based clustering algorithms in data mining: an experimental approach</article-title>,&#x201D; 
<source>Information Technology Journal</source>, vol. 
<volume>10</volume>, no. 
<issue>3</issue>, pp. 
<fpage>478</fpage>&#x2013;
<lpage>484</lpage>, 
<year>2011</year>.</mixed-citation>
</ref>
<ref id="ref-7">
<label>7</label><mixed-citation publication-type="journal">
<person-group person-group-type="author"><string-name>
<given-names>M. K.</given-names> 
<surname>Arzoo</surname></string-name> and <string-name>
<given-names>K.</given-names> 
<surname>Rathod</surname></string-name>
</person-group>, &#x201C;
<article-title>K-means algorithm with different distance metrics in spatial data mining with uses of netbeans ide 8. 2</article-title>,&#x201D; 
<source>International Research Journal of Engineering and Technology</source>, vol. 
<volume>4</volume>, no. 
<issue>4</issue>, pp. 
<fpage>2363</fpage>&#x2013;
<lpage>2368</lpage>, 
<year>2017</year>.</mixed-citation>
</ref>
<ref id="ref-8">
<label>8</label><mixed-citation publication-type="journal">
<person-group person-group-type="author"><string-name>
<given-names>G.</given-names> 
<surname>Tzortzis</surname></string-name> and <string-name>
<given-names>A.</given-names> 
<surname>Likas</surname></string-name>
</person-group>, &#x201C;
<article-title>The min-max k-means clustering algorithm</article-title>,&#x201D; 
<source>Pattern Recognition</source>, vol. 
<volume>47</volume>, no. 
<issue>7</issue>, pp. 
<fpage>2505</fpage>&#x2013;
<lpage>2516</lpage>, 
<year>2014</year>.</mixed-citation>
</ref>
<ref id="ref-9">
<label>9</label><mixed-citation publication-type="journal">
<person-group person-group-type="author"><string-name>
<given-names>F. U.</given-names> 
<surname>Siddiqui</surname></string-name> and <string-name>
<given-names>N. M.</given-names> 
<surname>Isa</surname></string-name>
</person-group>, &#x201C;
<article-title>Optimized k-means clustering algorithm for image segmentation</article-title>,&#x201D; 
<source>Opto-Electronics Review</source>, vol. 
<volume>20</volume>, no. 
<issue>3</issue>, pp. 
<fpage>216</fpage>&#x2013;
<lpage>225</lpage>, 
<year>2012</year>.</mixed-citation>
</ref>
<ref id="ref-10">
<label>10</label><mixed-citation publication-type="journal">
<person-group person-group-type="author"><string-name>
<given-names>M. E.</given-names> 
<surname>Celebi</surname></string-name>, <string-name>
<given-names>H. A.</given-names> 
<surname>Kingravi</surname></string-name> and <string-name>
<given-names>P. A.</given-names> 
<surname>Vela</surname></string-name>
</person-group>, &#x201C;
<article-title>A comparative study of efficient initialization methods for the k-means clustering algorithm</article-title>,&#x201D; 
<source>Expert Systems with Applications</source>, vol. 
<volume>40</volume>, no. 
<issue>1</issue>, pp. 
<fpage>200</fpage>&#x2013;
<lpage>210</lpage>, 
<year>2013</year>.</mixed-citation>
</ref>
<ref id="ref-11">
<label>11</label><mixed-citation publication-type="journal">
<person-group person-group-type="author"><string-name>
<given-names>H.</given-names> 
<surname>Kaur</surname></string-name> and <string-name>
<given-names>J. K.</given-names> 
<surname>Sahiwal</surname></string-name>
</person-group>, &#x201C;
<article-title>Image compression with an improved k-means algorithm for performance enhancement</article-title>,&#x201D; 
<source>International Journal of Computer Science and Management Research</source>, vol. 
<volume>2</volume>, no. 
<issue>6</issue>, pp. 
<fpage>1</fpage>&#x2013;
<lpage>8</lpage>, 
<year>2016</year>.</mixed-citation>
</ref>
<ref id="ref-12">
<label>12</label><mixed-citation publication-type="conf-proc">
<person-group person-group-type="author"><string-name>
<given-names>M. A. D. N. D.</given-names> 
<surname>Harale</surname></string-name> and <string-name>
<given-names>U. L.</given-names> 
<surname>Kulkarni</surname></string-name>
</person-group>, &#x201C;
<article-title>An iterative improved k-means clustering</article-title>,&#x201D; in <conf-name>International Conference on Advances in Computer Engineering</conf-name>, 
<publisher-loc>Kerala, India</publisher-loc>, pp. 
<fpage>25</fpage>&#x2013;
<lpage>28</lpage>, 
<year>2011</year>. </mixed-citation>
</ref>
<ref id="ref-13">
<label>13</label><mixed-citation publication-type="journal">
<person-group person-group-type="author"><string-name>
<given-names>M.</given-names> 
<surname>Sakthi</surname></string-name> and <string-name>
<given-names>S. T.</given-names> 
<surname>Antony</surname></string-name>
</person-group>, &#x201C;
<article-title>An effective determination of initial centroids in k-means clustering using kernel PCA</article-title>,&#x201D; 
<source>International Journal of Computer Science and Information Technologies</source>, vol. 
<volume>2</volume>, no. 
<issue>3</issue>, pp. 
<fpage>955</fpage>&#x2013;
<lpage>959</lpage>, 
<year>2011</year>.</mixed-citation>
</ref>
<ref id="ref-14">
<label>14</label><mixed-citation publication-type="journal">
<person-group person-group-type="author"><string-name>
<given-names>A.</given-names> 
<surname>Fahad</surname></string-name>, <string-name>
<given-names>N.</given-names> 
<surname>Alshatri</surname></string-name>, <string-name>
<given-names>Z.</given-names> 
<surname>Tari</surname></string-name>, <string-name>
<given-names>A.</given-names> 
<surname>Alamri</surname></string-name>, <string-name>
<given-names>I.</given-names> 
<surname>Khalil</surname></string-name> <etal>et al.</etal>
</person-group><italic>,</italic> &#x201C;
<article-title>A survey of clustering algorithms for big data: taxonomy and empirical analysis</article-title>,&#x201D; 
<source>IEEE Transactions on Emerging Topics in Computing</source>, vol. 
<volume>2</volume>, no. 
<issue>3</issue>, pp. 
<fpage>267</fpage>&#x2013;
<lpage>279</lpage>, 
<year>2014</year>.</mixed-citation>
</ref>
<ref id="ref-15">
<label>15</label><mixed-citation publication-type="journal">
<person-group person-group-type="author"><string-name>
<given-names>M.</given-names> 
<surname>Bora</surname></string-name>, <string-name>
<given-names>D.</given-names> 
<surname>Jyoti</surname></string-name>, <string-name>
<given-names>D.</given-names> 
<surname>Gupta</surname></string-name> and <string-name>
<given-names>A.</given-names> 
<surname>Kumar</surname></string-name>
</person-group>, &#x201C;
<article-title>Effect of different distance measures on the performance of k-means algorithm: an experimental study in matlab</article-title>,&#x201D; 
<source>ArXiv Preprint ArXiv</source>, vol. 
<volume>2014</volume>, pp. 
<fpage>1</fpage>&#x2013;
<lpage>9</lpage>, 
<year>2014</year>.</mixed-citation>
</ref>
<ref id="ref-16">
<label>16</label><mixed-citation publication-type="journal">
<person-group person-group-type="author"><string-name>
<given-names>R.</given-names> 
<surname>Loohach</surname></string-name> and <string-name>
<given-names>K.</given-names> 
<surname>Garg</surname></string-name>
</person-group>, &#x201C;
<article-title>Effect of distance functions on k-means clustering algorithm</article-title>,&#x201D; 
<source>International Journal of Computer Applications</source>, vol. 
<volume>50</volume>, no. 
<issue>1</issue>, pp. 
<fpage>1</fpage>&#x2013;
<lpage>8</lpage>, 
<year>2012</year>.</mixed-citation>
</ref>
<ref id="ref-17">
<label>17</label><mixed-citation publication-type="conf-proc">
<person-group person-group-type="author"><string-name>
<given-names>P.</given-names> 
<surname>Rathore</surname></string-name> and <string-name>
<given-names>D.</given-names> 
<surname>Shukla</surname></string-name>
</person-group>, &#x201C;
<article-title>Analysis and performance improvement of k-means clustering in the big data environment</article-title>,&#x201D; in <conf-name>IEEE International Conference on Communication Networks</conf-name>, 
<publisher-loc>London</publisher-loc>, pp. 
<fpage>43</fpage>&#x2013;
<lpage>46</lpage>, 
<year>2015</year>. </mixed-citation>
</ref>
<ref id="ref-18">
<label>18</label><mixed-citation publication-type="journal">
<person-group person-group-type="author"><string-name>
<given-names>S.</given-names> 
<surname>Saqib</surname></string-name>, <string-name>
<given-names>A.</given-names> 
<surname>Ditta</surname></string-name>, <string-name>
<given-names>M. A.</given-names> 
<surname>Khan</surname></string-name>, <string-name>
<given-names>S. A. R.</given-names> 
<surname>Kazmi</surname></string-name>, <string-name>
<given-names>H.</given-names> 
<surname>Alquhayz</surname></string-name> <etal>et al.</etal>
</person-group><italic>,</italic> &#x201C;
<article-title>Intelligent dynamic gesture recognition using cnn empowered by edit distance</article-title>,&#x201D; 
<source>Computers Materials &#x0026; Continua</source>, vol. 
<volume>66</volume>, no. 
<issue>2</issue>, pp. 
<fpage>2061</fpage>&#x2013;
<lpage>2076</lpage>, 
<year>2021</year>.</mixed-citation>
</ref>
<ref id="ref-19">
<label>19</label><mixed-citation publication-type="conf-proc">
<person-group person-group-type="author"><string-name>
<given-names>B. S.</given-names> 
<surname>Charulatha</surname></string-name>, <string-name>
<given-names>P.</given-names> 
<surname>Rodrigues</surname></string-name>, <string-name>
<given-names>T.</given-names> 
<surname>Chitralekha</surname></string-name> and <string-name>
<given-names>A.</given-names> 
<surname>Rajaraman</surname></string-name>
</person-group>, &#x201C;
<article-title>A comparative study of different distance metrics that can be used in fuzzy clustering algorithms</article-title>,&#x201D; in <conf-name>National Conference on Architecture, Software Systems and Green Computing</conf-name>, 
<publisher-loc>Tamil Nadu, India</publisher-loc>, pp. 
<fpage>1</fpage>&#x2013;
<lpage>9</lpage>, 
<year>2013</year>. </mixed-citation>
</ref>
</ref-list>
</back>
</article>