diff --git a/Paper_Latex/bare_conf_CS-beta1.1.aux b/Paper_Latex/bare_conf_CS-beta1.1.aux index bb78e1c..eb24df8 100644 --- a/Paper_Latex/bare_conf_CS-beta1.1.aux +++ b/Paper_Latex/bare_conf_CS-beta1.1.aux @@ -18,11 +18,13 @@ \citation{analysis} \citation{analysis} \citation{testing} +\citation{Marcus04aninformation} +\citation{Asuncion:2010} +\citation{Linstead:2009} \citation{labeledlda} +\@writefile{toc}{\contentsline {section}{\numberline {I}Introduction}{1}{section.1}} \citation{labeledlda} \citation{stmt} -\@writefile{toc}{\contentsline {section}{\numberline {I}Introduction}{1}{section.1}} -\@writefile{toc}{\contentsline {section}{\numberline {II}Background}{1}{section.2}} \citation{Marcus04aninformation} \citation{Asuncion:2010} \citation{Linstead:2009} @@ -31,6 +33,7 @@ \citation{Hindle} \citation{MSRChallenge2012} \citation{Hindle2011} +\@writefile{toc}{\contentsline {section}{\numberline {II}Background}{2}{section.2}} \@writefile{toc}{\contentsline {section}{\numberline {III}Related Work}{2}{section.3}} \@writefile{toc}{\contentsline {section}{\numberline {IV}Methodology}{2}{section.4}} \@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {IV-A}}Generating the data}{2}{subsection.4.1}} @@ -55,44 +58,35 @@ \@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces Number of bugs with the major version of Android for HTC and Motorola}}{5}{figure.1}} \newlabel{bugovertime}{{1}{5}{Number of bugs with the major version of Android for HTC and Motorola\relax }{figure.1}{}} \@writefile{toc}{\contentsline {subsubsection}{\numberline {\unhbox \voidb@x \hbox {V-B}1}Common Troubled Topic}{5}{subsubsection.5.2.1}} -\@writefile{toc}{\contentsline {subsubsection}{\numberline {\unhbox \voidb@x \hbox {V-B}2}Common Improved Topic}{5}{subsubsection.5.2.2}} \@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces Common Troubled Topics in HTC and Motorola}}{6}{figure.2}} \newlabel{commontopic}{{2}{6}{Common Troubled Topics in HTC and Motorola\relax }{figure.2}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces Common Improved Topics in HTC and Motorola}}{6}{figure.3}} -\newlabel{fixtopic}{{3}{6}{Common Improved Topics in HTC and Motorola\relax }{figure.3}{}} -\@writefile{toc}{\contentsline {subsubsection}{\numberline {\unhbox \voidb@x \hbox {V-B}3}Unique Topics}{7}{subsubsection.5.2.3}} +\@writefile{toc}{\contentsline {subsubsection}{\numberline {\unhbox \voidb@x \hbox {V-B}2}Common Improved Topic}{6}{subsubsection.5.2.2}} +\@writefile{toc}{\contentsline {subsubsection}{\numberline {\unhbox \voidb@x \hbox {V-B}3}Unique Topic}{6}{subsubsection.5.2.3}} +\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces Common Improved Topics in HTC and Motorola}}{7}{figure.3}} +\newlabel{fixtopic}{{3}{7}{Common Improved Topics in HTC and Motorola\relax }{figure.3}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces Unique Topics relevance in HTC}}{7}{figure.4}} +\newlabel{uniquehtc}{{4}{7}{Unique Topics relevance in HTC\relax }{figure.4}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {5}{\ignorespaces Unique Topics relevance in Motorola}}{7}{figure.5}} +\newlabel{uniquemoto}{{5}{7}{Unique Topics relevance in Motorola\relax }{figure.5}{}} \@writefile{toc}{\contentsline {section}{\numberline {VI}Discussion of Fragmentation}{7}{section.6}} -\@writefile{toc}{\contentsline {section}{\numberline {VII}Comparing of LDA and Labeled-LDA}{7}{section.7}} \citation{MSRChallenge2012} -\@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces Unique Topics relevance in HTC}}{8}{figure.4}} -\newlabel{uniquehtc}{{4}{8}{Unique Topics relevance in HTC\relax }{figure.4}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {5}{\ignorespaces Unique Topics relevance in Motorola}}{8}{figure.5}} -\newlabel{uniquemoto}{{5}{8}{Unique Topics relevance in Motorola\relax }{figure.5}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {8}{\ignorespaces Comparison of number of bug reports related to the same labels from LDA and Labeled LDA in HTC. The X axis is the same labels from LDA and Labeled LDA and the Y axis is the number of bug reports.}}{8}{figure.8}} -\newlabel{bughtc}{{8}{8}{Comparison of number of bug reports related to the same labels from LDA and Labeled LDA in HTC. The X axis is the same labels from LDA and Labeled LDA and the Y axis is the number of bug reports}{figure.8}{}} +\@writefile{toc}{\contentsline {section}{\numberline {VII}Comparing of LDA and Labeled-LDA}{8}{section.7}} +\@writefile{lof}{\contentsline {figure}{\numberline {8}{\ignorespaces Comparison of number of bug reports related to the same labels from LDA and labeled-LDA in HTC. The X axis is the same labels from LDA and labeled-LDA and the Y axis is the number of bug reports.}}{8}{figure.8}} +\newlabel{bughtc}{{8}{8}{Comparison of number of bug reports related to the same labels from LDA and labeled-LDA in HTC. The X axis is the same labels from LDA and labeled-LDA and the Y axis is the number of bug reports}{figure.8}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {9}{\ignorespaces Comparison of number of bug reports related to the same labels from LDA and labeled-LDA in Motorola. The X axis is the same labels from LDA and labeled-LDA and the Y axis is the number of bug reports.}}{8}{figure.9}} +\newlabel{bugmoto}{{9}{8}{Comparison of number of bug reports related to the same labels from LDA and labeled-LDA in Motorola. The X axis is the same labels from LDA and labeled-LDA and the Y axis is the number of bug reports}{figure.9}{}} \@writefile{toc}{\contentsline {section}{\numberline {VIII}Threats to validity}{8}{section.8}} -\@writefile{lof}{\contentsline {figure}{\numberline {9}{\ignorespaces Comparison of number of bug reports related to the same labels from LDA and Labeled LDA in Motorola. The X axis is the same labels from LDA and Labeled LDA and the Y axis is the number of bug reports.}}{8}{figure.9}} -\newlabel{bugmoto}{{9}{8}{Comparison of number of bug reports related to the same labels from LDA and Labeled LDA in Motorola. The X axis is the same labels from LDA and Labeled LDA and the Y axis is the number of bug reports}{figure.9}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {6}{\ignorespaces Jaccard similarity of labels between LDA and Labeled-LDA in HTC. X axis is the labels in Labeled LDA and Y axis is the labels of topics generated by LDA. The label ``null" in the Y axis means that topic cannot be labeled. The result is based on the HTC bug reports under the threshold of document relevance of 0.2. Brighter means higher Jaccard similarity.}}{9}{figure.6}} -\newlabel{similarityhtc}{{6}{9}{Jaccard similarity of labels between LDA and Labeled-LDA in HTC. X axis is the labels in Labeled LDA and Y axis is the labels of topics generated by LDA. The label ``null" in the Y axis means that topic cannot be labeled. The result is based on the HTC bug reports under the threshold of document relevance of 0.2. Brighter means higher Jaccard similarity}{figure.6}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {7}{\ignorespaces Jaccard similarity of labels between LDA and Labeled-LDA in Motorola. X axis is the labels in Labeled LDA and Y axis is the labels of topics generated by LDA. The label ``null" in the Y axis means that topic cannot be labeled. The result is based on the Motorola bug reports under the threshold of document relevance of 0.2. Brighter means higher Jaccard similarity.}}{9}{figure.7}} -\newlabel{similaritymoto}{{7}{9}{Jaccard similarity of labels between LDA and Labeled-LDA in Motorola. X axis is the labels in Labeled LDA and Y axis is the labels of topics generated by LDA. The label ``null" in the Y axis means that topic cannot be labeled. The result is based on the Motorola bug reports under the threshold of document relevance of 0.2. Brighter means higher Jaccard similarity}{figure.7}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {6}{\ignorespaces Jaccard similarity of labels between LDA and Labeled-LDA in HTC. X axis is the labels in labeled-LDA and Y axis is the labels of topics generated by LDA. The label ``null" in the Y axis means that topic cannot be labeled. The result is based on the HTC bug reports under the threshold of document relevance of 0.2. Brighter means higher Jaccard similarity.}}{9}{figure.6}} +\newlabel{similarityhtc}{{6}{9}{Jaccard similarity of labels between LDA and Labeled-LDA in HTC. X axis is the labels in labeled-LDA and Y axis is the labels of topics generated by LDA. The label ``null" in the Y axis means that topic cannot be labeled. The result is based on the HTC bug reports under the threshold of document relevance of 0.2. Brighter means higher Jaccard similarity}{figure.6}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {7}{\ignorespaces Jaccard similarity of labels between LDA and Labeled-LDA in Motorola. X axis is the labels in labeled-LDA and Y axis is the labels of topics generated by LDA. The label ``null" in the Y axis means that topic cannot be labeled. The result is based on the Motorola bug reports under the threshold of document relevance of 0.2. Brighter means higher Jaccard similarity.}}{9}{figure.7}} +\newlabel{similaritymoto}{{7}{9}{Jaccard similarity of labels between LDA and Labeled-LDA in Motorola. X axis is the labels in labeled-LDA and Y axis is the labels of topics generated by LDA. The label ``null" in the Y axis means that topic cannot be labeled. The result is based on the Motorola bug reports under the threshold of document relevance of 0.2. Brighter means higher Jaccard similarity}{figure.7}{}} \@writefile{toc}{\contentsline {section}{\numberline {IX}Conclusion and Future Work}{9}{section.9}} \bibstyle{IEEEtran} \bibdata{IEEEabrv,msrreference} \bibcite{usmarket}{1} \bibcite{analysis}{2} \bibcite{testing}{3} -\bibcite{labeledlda}{4} -\bibcite{stmt}{5} -\bibcite{Marcus04aninformation}{6} -\bibcite{Asuncion:2010}{7} \@writefile{lot}{\contentsline {table}{\numberline {III}{\ignorespaces Common Topics and associated Word List with Related Top 10 Terms}}{10}{table.3}} \newlabel{topicslist}{{III}{10}{Common Topics and associated Word List with Related Top 10 Terms\relax }{table.3}{}} \@writefile{toc}{\contentsline {section}{References}{10}{section*.1}} -\bibcite{Linstead:2009}{8} -\bibcite{Thomas:2011}{9} -\bibcite{Hindle}{10} -\bibcite{MSRChallenge2012}{11} -\bibcite{Hindle2011}{12} -\bibcite{historyofandroid}{13} +\bibcite{historyofandroid}{4} diff --git a/Paper_Latex/bare_conf_CS-beta1.1.dvi b/Paper_Latex/bare_conf_CS-beta1.1.dvi index b9b72da..c45fd77 100644 Binary files a/Paper_Latex/bare_conf_CS-beta1.1.dvi and b/Paper_Latex/bare_conf_CS-beta1.1.dvi differ diff --git a/Paper_Latex/bare_conf_CS-beta1.1.log b/Paper_Latex/bare_conf_CS-beta1.1.log index 5e70c18..61c42d4 100644 --- a/Paper_Latex/bare_conf_CS-beta1.1.log +++ b/Paper_Latex/bare_conf_CS-beta1.1.log @@ -1,4 +1,4 @@ -This is pdfTeX, Version 3.1415926-2.3-1.40.12 (MiKTeX 2.9) (preloaded format=pdflatex 2011.12.15) 19 APR 2012 00:29 +This is pdfTeX, Version 3.1415926-2.3-1.40.12 (MiKTeX 2.9) (preloaded format=pdflatex 2011.12.15) 19 APR 2012 15:58 entering extended mode **bare_conf_CS-beta1.1.tex (D:\github\fragment_android\Paper_Latex\bare_conf_CS-beta1.1.tex @@ -379,113 +379,167 @@ LaTeX Font Info: Try loading font information for U+msb on input line 432. ("D:\Program Files\miktex\tex\latex\amsfonts\umsb.fd" File: umsb.fd 2009/06/22 v3.00 AMS symbols B ) -Overfull \hbox (1.25832pt too wide) in paragraph at lines 432--432 -[][][][][]$\OT1/ptm/m/n/8 http : / / asymco . com / 2011 / 11 / 17 / the-[]glob -al-[]smartphone-[]market-[]landscape$[]| +Underfull \hbox (badness 4726) in paragraph at lines 432--432 +[][][][]\OT1/ptm/m/n/8 The Global Smart-phone Mar-ket Land-scape: []$http : / / + www . asymco .$ + [] + + +Underfull \hbox (badness 10000) in paragraph at lines 432--432 +$\OT1/ptm/m/n/8 com / 2011 / 11 / 17 / the-[]global-[]smartphone-[]market-[]lan +dscape(retrievedMarch ,[]$ [] Missing character: There is no â in font ptmr7t! Missing character: There is no € in font ptmr7t! Missing character: There is no ™ in font ptmr7t! + +LaTeX Warning: Citation `Marcus04aninformation' on page 1 undefined on input li +ne 436. + + +LaTeX Warning: Citation `Asuncion:2010' on page 1 undefined on input line 436. + + +LaTeX Warning: Citation `Linstead:2009' on page 1 undefined on input line 436. + + +LaTeX Warning: Citation `labeledlda' on page 1 undefined on input line 438. + Missing character: There is no â in font ptmr7t! Missing character: There is no € in font ptmr7t! Missing character: There is no ™ in font ptmr7t! +[1{C:/Users/Ray/AppData/Local/MiKTeX/2.9/pdftex/config/pdftex.map} + + +] + +LaTeX Warning: Citation `labeledlda' on page 2 undefined on input line 464. + Missing character: There is no â in font ptmr7t! Missing character: There is no € in font ptmr7t! Missing character: There is no ™ in font ptmr7t! -[1{C:/Users/Ray/AppData/Local/MiKTeX/2.9/pdftex/config/pdftex.map} +LaTeX Warning: Citation `stmt' on page 2 undefined on input line 464. -] -Overfull \hbox (19.03108pt too wide) in paragraph at lines 520--546 + +LaTeX Warning: Citation `Marcus04aninformation' on page 2 undefined on input li +ne 476. + + +LaTeX Warning: Citation `Asuncion:2010' on page 2 undefined on input line 476. + + +LaTeX Warning: Citation `Linstead:2009' on page 2 undefined on input line 476. + + +LaTeX Warning: Citation `Thomas:2011' on page 2 undefined on input line 476. + + +LaTeX Warning: Citation `Asuncion:2010' on page 2 undefined on input line 478. + + +LaTeX Warning: Citation `Hindle' on page 2 undefined on input line 482. + + +LaTeX Warning: Citation `MSRChallenge2012' on page 2 undefined on input line 49 +8. + + +Overfull \hbox (19.03108pt too wide) in paragraph at lines 524--550 [][] [] -Underfull \hbox (badness 3417) in paragraph at lines 569--569 +Underfull \hbox (badness 3417) in paragraph at lines 573--573 [][][][]\OT1/ptm/m/n/8 Android Op-er-at-ing Sys-tem sum-mary: []$http : / / en . wikipedia . org / wiki /$ [] -Underfull \hbox (badness 10000) in paragraph at lines 575--575 +Underfull \hbox (badness 10000) in paragraph at lines 579--579 [][][][]\OT1/ptm/m/n/8 Android Com-par-i-son: []$http : / / en . wikipedia . or g / wiki / Comparison[]of[]$ [] + +LaTeX Warning: Citation `Hindle2011' on page 2 undefined on input line 596. + [2] Missing character: There is no â in font ptmr7t! Missing character: There is no € in font ptmr7t! Missing character: There is no ™ in font ptmr7t! -LaTeX Warning: Command \l invalid in math mode on input line 704. +LaTeX Warning: Command \l invalid in math mode on input line 708. + + +LaTeX Warning: Command \l invalid in math mode on input line 710. + + +LaTeX Warning: Command \l invalid in math mode on input line 710. + +LaTeX Warning: Command \l invalid in math mode on input line 713. -LaTeX Warning: Command \l invalid in math mode on input line 706. +LaTeX Warning: Command \l invalid in math mode on input line 713. -LaTeX Warning: Command \l invalid in math mode on input line 706. +LaTeX Warning: Citation `Thomas:2011' on page 3 undefined on input line 761. -LaTeX Warning: Command \l invalid in math mode on input line 709. +LaTeX Warning: Citation `Hindle2011' on page 3 undefined on input line 761. -LaTeX Warning: Command \l invalid in math mode on input line 709. + +LaTeX Warning: Citation `Hindle' on page 3 undefined on input line 761. [3] -Overfull \hbox (15.64706pt too wide) in paragraph at lines 830--849 +Overfull \hbox (15.64706pt too wide) in paragraph at lines 834--853 [][] [] - + File: bugovertime.png Graphic file (type png) -Package pdftex.def Info: bugovertime.png used on input line 884. +Package pdftex.def Info: bugovertime.png used on input line 888. (pdftex.def) Requested size: 505.89pt x 227.56071pt. -[4] +[4] File: commontopic.png Graphic file (type png) -Package pdftex.def Info: commontopic.png used on input line 922. +Package pdftex.def Info: commontopic.png used on input line 926. (pdftex.def) Requested size: 505.89pt x 254.29144pt. Underfull \vbox (badness 1831) has occurred while \output is active [] - +Underfull \vbox (badness 10000) has occurred while \output is active [] + + [5 ] + File: fixtopic.png Graphic file (type png) -Package pdftex.def Info: fixtopic.png used on input line 942. +Package pdftex.def Info: fixtopic.png used on input line 946. (pdftex.def) Requested size: 505.89pt x 190.85187pt. -Underfull \vbox (badness 5022) has occurred while \output is active [] - - [5 ] -Underfull \hbox (badness 5817) in paragraph at lines 951--952 +Underfull \hbox (badness 5817) in paragraph at lines 955--956 \OT1/ptm/m/n/10 and Mo-torola share many iden-ti-cal terms for wifi [] - -Underfull \vbox (badness 10000) has occurred while \output is active [] - - -Underfull \vbox (badness 10000) has occurred while \output is active [] - - [6 ] + File: uniquehtc.png Graphic file (type png) - - -Package pdftex.def Info: uniquehtc.png used on input line 977. + +Package pdftex.def Info: uniquehtc.png used on input line 981. (pdftex.def) Requested size: 505.89pt x 70.02682pt. - -File: uniquemoto.png Graphic file (type png) - -Package pdftex.def Info: uniquemoto.png used on input line 984. + +File: uniquemoto.png Graphic file (type png) + +Package pdftex.def Info: uniquemoto.png used on input line 988. (pdftex.def) Requested size: 505.89pt x 70.02682pt. + +[6 ] Missing character: There is no â in font ptmr7t! Missing character: There is no € in font ptmr7t! Missing character: There is no ś in font ptmr7t! @@ -504,101 +558,109 @@ Missing character: There is no Missing character: There is no â in font ptmr7t! Missing character: There is no € in font ptmr7t! Missing character: There is no ť in font ptmr7t! - [7] -File: htcsim.png Graphic file (type png) - -Package pdftex.def Info: htcsim.png used on input line 1076. +Underfull \vbox (badness 4341) has occurred while \output is active [] + + [7 ] + +File: htcsim.png Graphic file (type png) + +Package pdftex.def Info: htcsim.png used on input line 1080. (pdftex.def) Requested size: 505.89pt x 196.72829pt. - -File: motosim.png Graphic file (type png) - -Package pdftex.def Info: motosim.png used on input line 1083. + +File: motosim.png Graphic file (type png) + +Package pdftex.def Info: motosim.png used on input line 1087. (pdftex.def) Requested size: 505.89pt x 198.38048pt. - -File: htcldallda.png Graphic file (type png) - -Package pdftex.def Info: htcldallda.png used on input line 1090. + +File: htcldallda.png Graphic file (type png) + +Package pdftex.def Info: htcldallda.png used on input line 1094. (pdftex.def) Requested size: 252.94499pt x 179.82805pt. -Overfull \hbox (9.03374pt too wide) in paragraph at lines 1090--1091 +Overfull \hbox (9.03374pt too wide) in paragraph at lines 1094--1095 [][] [] - + File: motoldallda.png Graphic file (type png) -Package pdftex.def Info: motoldallda.png used on input line 1097. +Package pdftex.def Info: motoldallda.png used on input line 1101. (pdftex.def) Requested size: 252.94499pt x 170.83348pt. -Overfull \hbox (9.03374pt too wide) in paragraph at lines 1097--1098 +Overfull \hbox (9.03374pt too wide) in paragraph at lines 1101--1102 [][] [] Missing character: There is no â in font ptmr7t! Missing character: There is no € in font ptmr7t! Missing character: There is no “ in font ptmr7t! + +LaTeX Warning: Citation `MSRChallenge2012' on page 8 undefined on input line 12 +06. + Missing character: There is no â in font ptmr7t! Missing character: There is no € in font ptmr7t! Missing character: There is no “ in font ptmr7t! Missing character: There is no â in font ptmr7t! Missing character: There is no € in font ptmr7t! Missing character: There is no “ in font ptmr7t! +[8 ] Missing character: There is no â in font ptmr7t! Missing character: There is no € in font ptmr7t! Missing character: There is no “ in font ptmr7t! -[8 ] -Underfull \vbox (badness 10000) has occurred while \output is active [] - [9 ] +Underfull \vbox (badness 10000) has occurred while \output is active [] + + (D:\github\fragment_android\Paper_Latex\bare_conf_CS-beta1.1.bbl -Underfull \hbox (badness 10000) in paragraph at lines 30--32 -[]\OT1/ptm/m/n/9 ``An Anal-y-sis of An-droid Frag-men-ta- +Underfull \hbox (badness 10000) in paragraph at lines 25--29 +$\OT1/ptm/m/n/9 08 / 30 / android-[]apple-[]70-[]percent-[]smartphone ,[] retri +evedMarch ,[]$ [] -Underfull \hbox (badness 10000) in paragraph at lines 30--32 -\OT1/ptm/m/n/9 tion,'' []$http : / / www . tech-[]thoughts . net / 2012 / 03 /$ - +Underfull \hbox (badness 7047) in paragraph at lines 31--34 +[]\OT1/ptm/m/n/9 ``An Anal-y-sis of An-droid Frag-men-ta-tion,'' []$http : / / +www .$ [] -Underfull \hbox (badness 10000) in paragraph at lines 34--36 -[]\OT1/ptm/m/n/9 ``Zipline CEO:Stop whin-ing about An- +Underfull \hbox (badness 10000) in paragraph at lines 31--34 +$\OT1/ptm/m/n/9 tech-[]thoughts . net / 2012 / 03 / analysis-[]of-[]android-[]f +ragmentation .$ [] -Underfull \hbox (badness 10000) in paragraph at lines 34--36 -\OT1/ptm/m/n/9 droid frag-men-ta-tion and do some damn +Underfull \hbox (badness 10000) in paragraph at lines 36--39 +[]\OT1/ptm/m/n/9 ``Stop whin-ing about An-droid frag-men-ta- [] -Underfull \hbox (badness 10000) in paragraph at lines 34--36 -\OT1/ptm/m/n/9 QA!'' []$http : / / thenextweb . com / google / 2012 / 04 / 02 / -$ +Underfull \hbox (badness 10000) in paragraph at lines 36--39 +\OT1/ptm/m/n/9 tion,'' []$http : / / thenextweb . com / google / 2012 / 04 / 02 + /$ [] -Overfull \hbox (68.26578pt too wide) in paragraph at lines 34--36 +Overfull \hbox (61.76387pt too wide) in paragraph at lines 36--39 $\OT1/ptm/m/n/9 zipline-[]ceo-[]stop-[]whining-[]about-[]android-[]fragmentatio -n-[]and-[]do-[]some-[]damn-[]qa/$[]. +n-[]and-[]do-[]some-[]damn-[]qa ,[]$ [] - -Underfull \vbox (badness 1552) has occurred while \output is active [] - - [10] -Underfull \hbox (badness 10000) in paragraph at lines 92--94 +[10] +Underfull \hbox (badness 10000) in paragraph at lines 41--44 []\OT1/ptm/m/n/9 ``A breif his-tory of An-droid,'' []$http : / /$ [] -Underfull \hbox (badness 10000) in paragraph at lines 92--94 +Underfull \hbox (badness 10000) in paragraph at lines 41--44 $\OT1/ptm/m/n/9 reviews . cnet . com / 8301-[]19736[]7-[]20016542-[]251 /$ [] @@ -614,44 +676,22 @@ Before submitting the final camera ready copy, remember to: uses only Type 1 fonts and that every step in the generation process uses the appropriate paper size. -Package atveryend Info: Empty hook `BeforeClearDocument' on input line 1368. +Package atveryend Info: Empty hook `BeforeClearDocument' on input line 1374. [11 - -] -Package atveryend Info: Empty hook `AfterLastShipout' on input line 1368. - (D:\github\fragment_android\Paper_Latex\bare_conf_CS-beta1.1.aux) -Package atveryend Info: Executing hook `AtVeryEndDocument' on input line 1368. -Package atveryend Info: Executing hook `AtEndAfterFileList' on input line 1368. - -Package rerunfilecheck Info: File `bare_conf_CS-beta1.1.out' has not changed. -(rerunfilecheck) Checksum: 229DC3667C93D68FF7223FFCB19CE56C;1515. - - -LaTeX Warning: Label(s) may have changed. Rerun to get cross-references right. - - ) +! pdfTeX error (ext4): \pdfendlink ended up in different nesting level than \pd +fstartlink. +\AtBegShi@Output ...ipout \box \AtBeginShipoutBox + \fi \fi +l.1374 \end{document} + + + Here is how much of TeX's memory you used: - 6575 strings out of 494045 - 97240 string characters out of 3145974 - 208596 words of memory out of 3000000 - 9722 multiletter control sequences out of 15000+200000 - 45422 words of font info for 95 fonts, out of 3000000 for 9000 + 6560 strings out of 494045 + 96936 string characters out of 3145974 + 208285 words of memory out of 3000000 + 9718 multiletter control sequences out of 15000+200000 + 45393 words of font info for 95 fonts, out of 3000000 for 9000 718 hyphenation exceptions out of 8191 - 29i,8n,28p,1181b,457s stack positions out of 5000i,500n,10000p,200000b,50000s -{D:/Program Files/miktex/fonts/enc/dvips/fontname/8r.enc} -Output written on bare_conf_CS-beta1.1.pdf (11 pages, 662577 bytes). -PDF statistics: - 335 PDF objects out of 1000 (max. 8388607) - 65 named destinations out of 1000 (max. 500000) - 222 words of extra memory for PDF output out of 10000 (max. 10000000) - + 29i,8n,28p,1285b,451s stack positions out of 5000i,500n,10000p,200000b,50000s +! ==> Fatal error occurred, no output PDF file produced! diff --git a/Paper_Latex/bare_conf_CS-beta1.1.out b/Paper_Latex/bare_conf_CS-beta1.1.out index a380943..2f555d1 100644 --- a/Paper_Latex/bare_conf_CS-beta1.1.out +++ b/Paper_Latex/bare_conf_CS-beta1.1.out @@ -14,7 +14,7 @@ \BOOKMARK [2][-]{subsection.5.2}{Topics Analysis of HTC and Motorola}{section.5}% 14 \BOOKMARK [3][-]{subsubsection.5.2.1}{Common Troubled Topic}{subsection.5.2}% 15 \BOOKMARK [3][-]{subsubsection.5.2.2}{Common Improved Topic}{subsection.5.2}% 16 -\BOOKMARK [3][-]{subsubsection.5.2.3}{Unique Topics}{subsection.5.2}% 17 +\BOOKMARK [3][-]{subsubsection.5.2.3}{Unique Topic}{subsection.5.2}% 17 \BOOKMARK [1][-]{section.6}{Discussion of Fragmentation}{}% 18 \BOOKMARK [1][-]{section.7}{Comparing of LDA and Labeled-LDA}{}% 19 \BOOKMARK [1][-]{section.8}{Threats to validity}{}% 20 diff --git a/Paper_Latex/bare_conf_CS-beta1.1.pdf b/Paper_Latex/bare_conf_CS-beta1.1.pdf index a949649..2d521ab 100644 Binary files a/Paper_Latex/bare_conf_CS-beta1.1.pdf and b/Paper_Latex/bare_conf_CS-beta1.1.pdf differ diff --git a/Paper_Latex/bare_conf_CS-beta1.1.synctex.gz b/Paper_Latex/bare_conf_CS-beta1.1.synctex.gz deleted file mode 100644 index e823b9a..0000000 Binary files a/Paper_Latex/bare_conf_CS-beta1.1.synctex.gz and /dev/null differ diff --git a/Paper_Latex/bare_conf_CS-beta1.1.tex b/Paper_Latex/bare_conf_CS-beta1.1.tex index fcf6da3..571c923 100644 --- a/Paper_Latex/bare_conf_CS-beta1.1.tex +++ b/Paper_Latex/bare_conf_CS-beta1.1.tex @@ -54,7 +54,7 @@ % Note that the a4paper option is mainly intended so that authors in % countries using A4 can easily print to A4 and see how their papers will -% look in print - the typesetting of the document will not typically be +% look in print - the typesettings of the document will not typically be % affected with changes in paper size (but the bottom and side margins will). % Use the testflow package mentioned above to verify correct handling of % both paper sizes by the user's LaTeX system. @@ -361,11 +361,11 @@ % use a multiple column layout for up to two different % affiliations -\author{\IEEEauthorblockN{Dan Han, Chenlei Zhang, Xiaochao Fan, Abram Hindle, Kenny Wong, Eleni Stroulia} +\author{\IEEEauthorblockN{Dan Han, Chenlei Zhang, Xiaochao Fan, Abram Hindle, Kenny Wong and Eleni Stroulia} \IEEEauthorblockA{Department of Computing Science\\ University of Alberta \\ Edmonton, Canada\\ -\{dhan3, chenlei1, xf2, hindle1, kenw, stroulia\}@ualberta.ca} +\{dhan3, chenlei1, xf2, hindle1, kenw, stroulia\}@cs.ualberta.ca} } @@ -405,12 +405,12 @@ \begin{abstract} -Android fragmentation has been a controversial topic, but both proponents and opponents cannot provide strong evidences to support their statements. In order to make the debate more clear, we mined and analyzed the Android bug reports related to two popular Android vendors, HTC and Motorola. We manually annotated bug reports with labels and applied Labeled Latent Dirichlet Allocation (LDA) to the datasets to produce bug topics. By comparing the average relevance of top 20 bug topics over time for both vendors, we categorized the topics into two types which are common topics and unique topics. We investigated and discussed these two types of bug topics relevance tendency over time. Our analysis results lead to the conclusion that Android fragments into multiple incompatible and brand-specific versions. Our findings can be used by Android system community, stakeholders, Android device vendors and developers to make project dashboards, process investigation and feature analysis. +Android fragmentation has been a controversial topic. In this study, we investigated the fragmentation of Android by a comparison of Android vendor's bug reports via topic analysis. We mined and analyzed the Android bug reports related to two popular Android vendors, HTC and Motorola. We manually annotated bug reports with labels and applied Labeled Latent Dirichlet Allocation (Labeled-LDA) to the datasets to produce bug topics. By comparing the distribution of average relevance of top 18 bug topics over time for both vendors, we categorized the topics into three types which are common troubled topic, common improved topic and unique topic. The common troubled topics show that there is no correlation between the troubled features of Android and Android evolution. The common improved topics show that some features within the same vendors have portability issues across their multiple devices. The unique topics show that different vendors have specific bug topics which imply there may be the portability problem on the different vendors with the same Android version. Our findings can be used by Android system community, stakeholders, Android device vendors and developers to make project dashboards, process investigation and feature analysis. \end{abstract} \begin{IEEEkeywords} -Bug reports; Topic mining; Labeled LDA +Bug reports; Topic mining; Labeled-LDA \end{IEEEkeywords} @@ -429,22 +429,26 @@ \section{Introduction} % no \IEEEPARstart -The market share of mobile phones is always increasing and getting more and more competitive among various mobile model vendors\footnote{\url{http://asymco.com/2011/11/17/the-global-smartphone-market-landscape}}. iPhone and Android phone share almost 70\% of US mobile phone market \cite{usmarket}. Compared to Apple’s closed ecosystem for iOS, in general, Android has both software fragmentation and hardware fragmentation\cite{analysis}. Android Software fragmentation includes (1) Customized device-specific Android and UI-specific Android from vendors (2) Customized carrier-specific Android from vendors. Hardware fragmentation means that at any given point in time, devices based on the same Android are running on different types of hardware, related to Processors, Graphics Processors, screen size and so on \cite{analysis}. These fragmentation leads to the additional testing work for Android application across multiple devices; it causes Android users are left out of some new feature because of the low upgrade, and it also makes someone lose confidence in Android. +The market share of mobile phones is always changing and getting more and more competitive among various mobile device vendors\footnote{The Global Smartphone Market Landscape: \url{http://www.asymco.com/2011/11/17/the-global-smartphone-market-landscape (retrieved March, 2012)}}. The iPhone and Android phones share almost 70\% of US mobile phone market \cite{usmarket}. Compared to Apple’s closed ecosystem for iOS, in general, Android has both software fragmentation and hardware fragmentation \cite{analysis}. Android Software fragmentation includes (1) customized device-specific Android and UI-specific Android from vendors (2) customized carrier-specific Android from vendors. Hardware fragmentation means that at any given point in time, devices based on the same Android are running on different types of hardware, related to processors, graphics processors, and screen size \cite{analysis}. These fragmentation leads to the additional testing work for Android application across multiple devices; it stops Android users to experience some new features of Android because of the upgrading delay; and it also makes people lose confidence in Android. -Android fragmentation has been a controversial topic which swells up now and again regrading its provenance and its impacts. However, no one can provide strong evidences to support their statements. Someone from industry performed experiments of Android on different devices and found that the root cause of fragmentation is the classical software engineering issues\cite{testing}. +Android fragmentation has been a controversial topic which swells up now and again regrading its provenance and its impacts. However, no one can provide strong evidences to support their statements. Someone from industry performed experiments of Android on different devices and found that the root cause of fragmentation is the classical software engineering issues \cite{testing}. -In our study, we want to explore this problem by mining and analyzing Android user bug reports. We applied Labeled LDA\cite{labeledlda}. on bug reports of different vendors to analyze the bug topics. We then did the analysis on the bug topics and based on the topics analysis, and we discussed what features of Android contributed much on Android fragmentation in the end. In terms of bug reports, We chose the bug reports of HTC and Motorola in this study. HTC’s first Android phone was the HTC Dream manufactured in Oct. 2008. HTC has made more than thirty different Android phones since then. Motorola made their first Android phone in Oct. 2009 and has released more than twenty different Android phones since then. Their Android products have gained widespread popularity. +In this paper, we applied topic analysis on the Android bug reports. A topic of the document (e.g. bug reports, source code changes and commits) is generated by topic models which has been used to help understand software systems. There are a few topic models utilized by researchers in software engineering, e.g. Latent Dirichlet Allocation (LDA), Latent Semantic Index (LSI) and Labeled Latent Dirichlet Allocation (Labeled-LDA) \cite{Marcus04aninformation} \cite{Asuncion:2010} \cite{Linstead:2009}. + +In our study, we want to explore the fragmentation of Android by mining and analyzing Android user bug reports. We applied labeled-LDA\cite{labeledlda} on bug reports of different vendors and analyzed topics in bug reports. We then did the analysis on the bug topics and based on the topics analysis, we discussed what features of Android contributed much on Android fragmentation in the end. + +In terms of bug reports, we chose the bug reports of HTC and Motorola in this study. HTC’s first Android phone was the HTC Dream manufactured in Oct. 2008. HTC has made more than thirty different Android phones since then. Motorola made their first Android phone in Oct. 2009 and has released more than twenty different Android phones since then. Their Android products have gained widespread popularity. -This paper makes the following contribution: +This paper makes the following contributions: \begin{itemize} -\item we found that some features of Android contribute software fragmentation and some features contribute hardware fragmentation. +\item we found that some features of Android contribute to both software fragmentation and hardware fragmentation. \end{itemize} \begin{itemize} -\item We provided a methodology which can be used to analyze other Android branches fragmentation issues to discover more hardware fragmentation. +\item We provided a methodology which can be used to analyze other Android branches' fragmentation. \end{itemize} -The paper is organized as follows: Section 2 describes the background; we discuss the related work in section 3; in section 4, we explicate our methodology about the mining approaches applied on this study; section 5 is to compare and evaluate the topic models generated by LDA and Labeled LDA; we introduce the analysis of topic evolution models in section 6; the paper concludes with a discussion of two research questions, threats to validity, conclusion and future work in section 7, 8 and 9 correspondingly. +The paper is organized as follows: Section 2 describes the background; we discuss the related work in section 3; in section 4, we introduce our methodology; we apply the analysis of topic evolution models in section 5; section 6 compares and evaluates the topic models generated by LDA and labeled-LDA; the paper concludes with a discussion of two research questions, threats to validity, conclusion and future work in section 7, 8 and 9 correspondingly. % You must have at least 2 lines in the paragraph with the drop letter % (should never be an issue) @@ -457,7 +461,7 @@ \section{Introduction} %Subsubsection text here. \section{Background} -In our research, we apply Labeled LDA to perform topic analysis. Labeled LDA is a supervised topic model for credit attribution in multi-labeled corpora\cite{labeledlda}. It defines a one-to-one mapping between LDA’s latent topics and tags labeled by users. In other words, Labeled LDA incorporates the multiple tags into the topics learning process and only builds topics around these tags, which is quite different from LDA. LDA, as a totally unsupervised algorithm, automatically learns a set of terms for each topic on a corpus without any constraints. To apply Labeled LDA, we utilize the Stanford Topic Modeling Toolbox (STMT)\cite{stmt}. Specifically, this tool outputs a set of topics, each one consisting of a list of terms, and the relevance distribution between each bug report and all the topics. +In our research, we apply labeled-LDA to perform topic analysis. labeled-LDA is a supervised topic model for credit attribution in multi-labeled corpora\cite{labeledlda}. It defines a one-to-one mapping between LDA’s latent topics and tags labeled by users. In other words, labeled-LDA incorporates the multiple tags into the topics learning process and only builds topics around these tags, which is quite different from LDA. LDA, as a totally unsupervised algorithm, automatically learns a set of terms for each topic on a corpus without any constraints. To apply labeled-LDA, we utilize the Stanford Topic Modeling Toolbox (STMT)\cite{stmt}. %\begin{figure*}[htb] @@ -471,11 +475,11 @@ \section{Background} \section{Related Work} Topic models have been used to help understand software systems. Marcus et al.\cite{Marcus04aninformation} used Latent Semantic Indexing (LSI) on both source code and user queries and then identified the most relevant source code documents with similarity measurements. Asuncion et al.\cite{Asuncion:2010} applied a coherence measurement on topics learned by LDA to model the quality of bug reports. Linstead et al.\cite{Linstead:2009} performed LDA to generate traceability links for artifacts in software projects automatically. Topic modeling is also utilized by Thomas et al.\cite{Thomas:2011} to study the evolution of topics in software projects. -Compared with all these approaches, our work differs from them in two aspects. We manually labeled bug reports with multiple labels. And we applied labeled LDA in our work to overcome the disadvantages of these unsupervised algorithms by pre-defining the number of topics and interpreting the extracted topics \cite{Asuncion:2010}. +Compared with all these approaches, our work differs from them in two aspects. We manually labeled bug reports with multiple labels. And we applied Labeled-LDA in our work to overcome the disadvantages of these unsupervised algorithms by pre-defining the number of topics and interpreting the extracted topics \cite{Asuncion:2010}. \section{Methodology} -Our methodology is to extract bug reports, assign multiple labels to each of them and then apply Labeled LDA on the labeled data. After that we calculate the average relevance of bug reports to each label over time\cite{Hindle} and compare them between two Android vendors, HTC and Motorola. In order to compare the performance between LDA and Labeled LDA, we also apply LDA on the extracted bug reports of HTC and Motorola without our manual labels. We label all the topics generated by LDA. For each vendor, we calculate the similarity of each pair of labels from LDA and Labeled LDA to evaluate their performance. +Our methodology is to extract bug reports, assign multiple labels to each of them and then apply labeled-LDA on the labeled data. After that we calculate the average relevance of bug reports to each label over time\cite{Hindle} and compare them between two Android vendors, HTC and Motorola. In order to compare the performance between LDA and labeled-LDA, we also apply LDA on the extracted bug reports of HTC and Motorola without our manual labels. We label all the topics generated by LDA. For each vendor, we calculate the similarity of each pair of labels from LDA and labeled-LDA to evaluate their performance. \subsection{Generating the data} @@ -523,7 +527,7 @@ \subsection{Generating the data} \hline HTC & sms\//mms calling email contact video time network system\\ & android\_market display browser bluetooth audio memory input\\ - & notification image SIM\_card setting layout app upgrade\\ + & notification image SIM\_card settings layout app upgrade\\ & wifi google\_map keyboard calendar alarm language car search\\ & dialing USB touchscreen CPU gtalk voicedialing signal\\ & google\_voice ringtone google\_navigation location font\\ @@ -533,7 +537,7 @@ \subsection{Generating the data} & synchronize voicemail voice\_recognition facebook flash\\ & google\_latitude GPS camera youtube\\ \hline -Motorola & calling network setting gtalk calendar signal contact\\ +Motorola & calling network settings gtalk calendar signal contact\\ & android\_market input camera image app wifi keyboard system\\ & layout sms\//mms bluetooth display browser email notification\\ & alarm audio multimedia\_dock car SD\_card screen text lock\\ @@ -657,11 +661,11 @@ \subsection{Labelling the HTC and Motorola Bug Reports} Table \ref{selected1} lists all the manual labels from bug reports of HTC and Motorola. -% XXX TODO Labeled LDA to Labeled-LDA +% XXX TODO labeled-LDA to Labeled-LDA \subsection{Applying Labeled-LDA} -% 4. Apply Labeled LDA:We applied the Labeled-LDA tool, Stanford Topic +% 4. Apply labeled-LDA:We applied the Labeled-LDA tool, Stanford Topic % Modeling Toobox [http://nlp.stanford.edu/software/tmt/tmt-0.4/], to % get the topic-document distribution on our labeled bug reports. @@ -688,7 +692,7 @@ \subsection{Applying Labeled-LDA} with our label, as well as a document-topic matrix which links our labels to the documents in the each bug report corpus (HTC and Motorola). -%By applying Labeled LDA to the bug reports of HTC and Motorola +%By applying labeled-LDA to the bug reports of HTC and Motorola %separately, we have the word distribution of each label and a matrix %that provides the relationship between bug reports and the labels. @@ -767,10 +771,10 @@ \subsection{Applying LDA} \subsection{Comparing the Effort to Use LDA and Labeled-LDA} -% 8. Comparison of LDA and Labeled LDA: For each pair of topics in LDA -% and Labeled LDA, we computed the their similarity based on the +% 8. Comparison of LDA and labeled-LDA: For each pair of topics in LDA +% and labeled-LDA, we computed the their similarity based on the % topic-document distribution. That is the Jaccard similarity of the two -% sets. One is from LDA and the other one is from Labeled LDA. Each set +% sets. One is from LDA and the other one is from labeled-LDA. Each set % is the bug reports that have relevance to that label. We chose several % thresholds on the relevance. That is if the relevance is under the % threshold, this bug report is not related to that label. At last we @@ -781,11 +785,11 @@ \subsection{Comparing the Effort to Use LDA and Labeled-LDA} Labeled-LDA we had to compare the results. % Thus nce LDA and Labelled-LDA were applied to the bug reports of HTC and % Motorola we had to compare the topics that were extracted. -Both LDA and Labeled LDA produce matrices of +Both LDA and labeled-LDA produce matrices of the relationship between bug reports of two vendors and the label or topics. That is if the topics generated by LDA that were labeled as the same -ones in Labeled LDA would be related to similar bug reports. +ones in labeled-LDA would be related to similar bug reports. We determined topic similarity by comparing the sets of documents relevant to a LDA topic and those relevant to a Labeled-LDA @@ -793,9 +797,9 @@ \subsection{Comparing the Effort to Use LDA and Labeled-LDA} topic we did pair-wise similarity comparisons. We applied the Jaccard similarity coefficient to compute the -similarity between each topic in LDA and each label in Labeled LDA. +similarity between each topic in LDA and each label in labeled-LDA. That is, the Jaccard similarity coefficient between label A in LDA and -label B in Labeled LDA is the ratio of the intersection of bug reports +label B in labeled-LDA is the ratio of the intersection of bug reports related to label A and label B to the union of the bug reports related to label A and label B, \begin{equation} @@ -858,25 +862,25 @@ \section{Topic Mining and Analysis} %\begin{figure}[!htb] %\centering %\includegraphics[width=0.4\textwidth]{htcldallda.png} -%\caption{Comparison of number of bug reports related to the same labels from LDA and Labeled LDA in HTC. The X axis is the same labels from LDA and Labeled LDA and the Y axis is the number of bug reports.} +%\caption{Comparison of number of bug reports related to the same labels from LDA and labeled-LDA in HTC. The X axis is the same labels from LDA and labeled-LDA and the Y axis is the number of bug reports.} %\end{figure} % %\begin{figure}[!htb] %\centering %\includegraphics[width=0.4\textwidth]{motoldallda.png} -%\caption{Comparison of number of bug reports related to the same labels from LDA and Labeled LDA in Motorola. The X axis is the same labels from LDA and Labeled LDA and the Y axis is the number of bug reports.} +%\caption{Comparison of number of bug reports related to the same labels from LDA and labeled-LDA in Motorola. The X axis is the same labels from LDA and labeled-LDA and the Y axis is the number of bug reports.} %\end{figure} % %\begin{figure}[htb] %\centering %\includegraphics[width=0.4\textwidth]{htcratiosim.png} -%\caption{The comparison of ratio and similarity in HTC. The result of the smaller number of bug reports related to this label in LDA or Labeled LDA divided by the larger one is the ratio of this label. The X axis is the same labels from LDA and Labeled LDA.} +%\caption{The comparison of ratio and similarity in HTC. The result of the smaller number of bug reports related to this label in LDA or labeled-LDA divided by the larger one is the ratio of this label. The X axis is the same labels from LDA and labeled-LDA.} %\end{figure} % %\begin{figure}[htb] %\centering %\includegraphics[width=0.4\textwidth]{motoratiosim.png} -%\caption{The comparison of ratio and similarity in Motorola. The result of the smaller number of bug reports related to this label in LDA or Labeled LDA divided by the larger one is the ratio of this label. The X axis is the same labels from LDA and Labeled LDA.} +%\caption{The comparison of ratio and similarity in Motorola. The result of the smaller number of bug reports related to this label in LDA or labeled-LDA divided by the larger one is the ratio of this label. The X axis is the same labels from LDA and labeled-LDA.} %\end{figure} \begin{figure*}[htb] @@ -929,9 +933,9 @@ \subsubsection{Common Troubled Topic} %relevance shoule be written as the distribution of the average relevance Eight Common Troubled Topics shared by two vendors are shown in Table \ref{topicslist} and the distribution of average relevance of each topic is shown in Figure \ref{commontopic}. %all the labels should be consistent with all the labels in table I. -In Table \ref{topicslist}, the topics in HTC and Motorola share many identical terms. That means they have the same issues about sms\/mms(\textit{text, thread, send}), calendar(\textit{event, day, google,appointment,time}), email(\textit{gmail, send, thread}), contact (\textit{number, google,list}), display (\textit{screen,button,behavior}), bluetooth (\textit{headset,connect, calling}), synchronize (\textit{contact, exchange, google}) and setting(\textit{turn,network,mode}). +In Table \ref{topicslist}, the topics in HTC and Motorola share many identical terms. That means they have the same issues about sms\/mms(\textit{text, thread, send}), calendar(\textit{event, day, google,appointment,time}), email(\textit{gmail, send, thread}), contact (\textit{number, google,list}), display (\textit{screen,button,behavior}), bluetooth (\textit{headset,connect, calling}), synchronize (\textit{contact, exchange, google}) and settings(\textit{turn,network,mode}). -We also found that multiple topics share some same terms for each vendor. For HTC, we can see, five topics including sms/mms, contact, display, bluetooth and setting share the same term ``desire". This indicates that these topics happened frequently in HTC Desire model. Calendar and bluetooth share the same term ``2.2" and ``2.2" means Android version 2.2. This indicates that these two topics happened frequently for Android 2.2 in HTC models. For Motorola, seven topics except setting share the same term ``droid" and it means Motorola Droid model. In addition, calendar and synchronize in Motorola share ``milestone" which indicates these two topics discussed mostly in Motorola Milestone model. ``Xoom" shared by display and setting indicate that Motorola Xoom has more bug reports related with these two topics. Furthermore, synchronize associates with both ``Xoom" and ``milestone" terms. This indicates bugs related with synchronization happened frequently in both Motorola Xoom and Motorola Milestone. +We also found that multiple topics share some same terms for each vendor. For HTC, we can see, five topics including sms/mms, contact, display, bluetooth and settings share the same term ``desire". This indicates that these topics happened frequently in HTC Desire model. Calendar and bluetooth share the same term ``2.2" and ``2.2" means Android version 2.2. This indicates that these two topics happened frequently for Android 2.2 in HTC models. For Motorola, seven topics except settings share the same term ``droid" and it means Motorola Droid model. In addition, calendar and synchronize in Motorola share ``milestone" which indicates these two topics discussed mostly in Motorola Milestone model. ``Xoom" shared by display and settings indicate that Motorola Xoom has more bug reports related with these two topics. Furthermore, synchronize associates with both ``Xoom" and ``milestone" terms. This indicates bugs related with synchronization happened frequently in both Motorola Xoom and Motorola Milestone. In Figure \ref{commontopic}, HTC and Motorola share the same trends of the distribution of average relevance of topics. Both of them have continuous spikes and drops for each topic over time. That indicates bugs associated with these topics have no obvious decreasing trends with Android evolution. %more explain why android version should make the trend of topics decrease in the ideal case. @@ -987,7 +991,7 @@ \subsubsection{Common Improved Topic} \end{figure*} -\subsubsection{Unique Topics} +\subsubsection{Unique Topic} There are the two unique topics for HTC shown in Table \ref{topicslist}. Figure \ref{uniquehtc} shows the distribution of the average relevance of each topic. @@ -1005,7 +1009,7 @@ \subsubsection{Unique Topics} \section{Discussion of Fragmentation} %What is feature evolution? -According to the analysis about Common Troubled Topics, we can see that there is no strong correlation between the feature evolution and Android evolution. In addition, The topic - upgrade has strong correlation with Android 2.1 and Android 2.2. As there are some features evolution demonstrate stable trends with Android evolution implicated by the Common Improved Topics, we can conclude that Android has compatibility issue in some features. +According to the analysis about Common Troubled Topics, we can see that there is no strong correlation between the feature evolution and Android evolution. In addition, The topic upgrade has strong correlation with Android 2.1 and Android 2.2. As there are some features evolution demonstrate stable trends with Android evolution implicated by the Common Improved Topics, we can conclude that Android has compatibility issue in some features. From Common Improved Topics and Unique Topics, we can see the same topic from different vendors have different correlation, and they have strong correlation with some specific vendors' models. These observations reveal that Android has portability issue in some features. @@ -1015,25 +1019,25 @@ \section{Discussion of Fragmentation} %\begin{figure}[!htb] %\centering %\includegraphics[width=0.4\textwidth]{htcldallda.png} -%\caption{Comparison of number of bug reports related to the same labels from LDA and Labeled LDA in HTC. The X axis is the same labels from LDA and Labeled LDA and the Y axis is the number of bug reports.} +%\caption{Comparison of number of bug reports related to the same labels from LDA and labeled-LDA in HTC. The X axis is the same labels from LDA and labeled-LDA and the Y axis is the number of bug reports.} %\end{figure} % %\begin{figure}[!htb] %\centering %\includegraphics[width=0.4\textwidth]{motoldallda.png} -%\caption{Comparison of number of bug reports related to the same labels from LDA and Labeled LDA in Motorola. The X axis is the same labels from LDA and Labeled LDA and the Y axis is the number of bug reports.} +%\caption{Comparison of number of bug reports related to the same labels from LDA and labeled-LDA in Motorola. The X axis is the same labels from LDA and labeled-LDA and the Y axis is the number of bug reports.} %\end{figure} % %\begin{figure}[htb] %\centering %\includegraphics[width=0.4\textwidth]{htcratiosim.png} -%\caption{The comparison of ratio and similarity in HTC. The result of the smaller number of bug reports related to this label in LDA or Labeled LDA divided by the larger one is the ratio of this label. The X axis is the same labels from LDA and Labeled LDA.} +%\caption{The comparison of ratio and similarity in HTC. The result of the smaller number of bug reports related to this label in LDA or labeled-LDA divided by the larger one is the ratio of this label. The X axis is the same labels from LDA and labeled-LDA.} %\end{figure} % %\begin{figure}[htb] %\centering %\includegraphics[width=0.4\textwidth]{motoratiosim.png} -%\caption{The comparison of ratio and similarity in Motorola. The result of the smaller number of bug reports related to this label in LDA or Labeled LDA divided by the larger one is the ratio of this label. The X axis is the same labels from LDA and Labeled LDA.} +%\caption{The comparison of ratio and similarity in Motorola. The result of the smaller number of bug reports related to this label in LDA or labeled-LDA divided by the larger one is the ratio of this label. The X axis is the same labels from LDA and labeled-LDA.} %\end{figure} @@ -1074,47 +1078,47 @@ \section{Comparing of LDA and Labeled-LDA} \begin{figure*}[htb] \centering \includegraphics[width=1\textwidth]{htcsim.png} -\caption{Jaccard similarity of labels between LDA and Labeled-LDA in HTC. X axis is the labels in Labeled LDA and Y axis is the labels of topics generated by LDA. The label ``null" in the Y axis means that topic cannot be labeled. The result is based on the HTC bug reports under the threshold of document relevance of 0.2. Brighter means higher Jaccard similarity.} +\caption{Jaccard similarity of labels between LDA and Labeled-LDA in HTC. X axis is the labels in labeled-LDA and Y axis is the labels of topics generated by LDA. The label ``null" in the Y axis means that topic cannot be labeled. The result is based on the HTC bug reports under the threshold of document relevance of 0.2. Brighter means higher Jaccard similarity.} \label{similarityhtc} \end{figure*} \begin{figure*}[htb] \centering \includegraphics[width=1\textwidth]{motosim.png} -\caption{Jaccard similarity of labels between LDA and Labeled-LDA in Motorola. X axis is the labels in Labeled LDA and Y axis is the labels of topics generated by LDA. The label ``null" in the Y axis means that topic cannot be labeled. The result is based on the Motorola bug reports under the threshold of document relevance of 0.2. Brighter means higher Jaccard similarity.} +\caption{Jaccard similarity of labels between LDA and Labeled-LDA in Motorola. X axis is the labels in labeled-LDA and Y axis is the labels of topics generated by LDA. The label ``null" in the Y axis means that topic cannot be labeled. The result is based on the Motorola bug reports under the threshold of document relevance of 0.2. Brighter means higher Jaccard similarity.} \label{similaritymoto} \end{figure*} \begin{figure}[htb] \centering \includegraphics[width=0.5\textwidth]{htcldallda.png} -\caption{Comparison of number of bug reports related to the same labels from LDA and Labeled LDA in HTC. The X axis is the same labels from LDA and Labeled LDA and the Y axis is the number of bug reports.} +\caption{Comparison of number of bug reports related to the same labels from LDA and labeled-LDA in HTC. The X axis is the same labels from LDA and labeled-LDA and the Y axis is the number of bug reports.} \label{bughtc} \end{figure} \begin{figure}[!htb] \centering \includegraphics[width=0.5\textwidth]{motoldallda.png} -\caption{Comparison of number of bug reports related to the same labels from LDA and Labeled LDA in Motorola. The X axis is the same labels from LDA and Labeled LDA and the Y axis is the number of bug reports.} +\caption{Comparison of number of bug reports related to the same labels from LDA and labeled-LDA in Motorola. The X axis is the same labels from LDA and labeled-LDA and the Y axis is the number of bug reports.} \label{bugmoto} \end{figure} -%Most of the same labels from LDA and Labeled LDA have the comparable amount of bug reports. For example, the label ``calling'' from the HTC bug reports has exactly the same number of bugs related to for both results of LDA and Labeled LDA. However, the similarity of these related bug reports in terms of this label ``calling'' is very low which means LDA and Labeled LDA related quite different bug reports to this label. When doing this comparison, we cannot ignore the number of bugs that related to each label from both two techniques. That is, for one label, the ratio (the smaller number is divided by the bigger number so the ratio is always less or equal to one) of the number of bug reports related to this label predicted by LDA to that of Labeled LDA would be the upper bound of the similarity value. From Figure \ref{bughtc} and Figure \ref{bugmoto} that the relation between topics and each bug report modeled by LDA is quite different from the results generated by Labeled LDA. +%Most of the same labels from LDA and labeled-LDA have the comparable amount of bug reports. For example, the label ``calling'' from the HTC bug reports has exactly the same number of bugs related to for both results of LDA and labeled-LDA. However, the similarity of these related bug reports in terms of this label ``calling'' is very low which means LDA and labeled-LDA related quite different bug reports to this label. When doing this comparison, we cannot ignore the number of bugs that related to each label from both two techniques. That is, for one label, the ratio (the smaller number is divided by the bigger number so the ratio is always less or equal to one) of the number of bug reports related to this label predicted by LDA to that of labeled-LDA would be the upper bound of the similarity value. From Figure \ref{bughtc} and Figure \ref{bugmoto} that the relation between topics and each bug report modeled by LDA is quite different from the results generated by labeled-LDA. %The similarity values for these labels in Figure \ref{similaritymoto} are quite low compared with the ratio. Only about ten labels in HTC have similarity values that are larger than half of the ratio. For Motorola, the similarity values are all very low compared with the upper bound of the similarity values. -We can conclude that only few of the bug reports in HTC and Motorola are predicted by LDA and Labeled LDA to be related to the same labels. In other words, the relation between topics and each bug report modeled by LDA is quite different from the results generated by Labeled-LDA. We think the manual efforts of labeling all the bug reports would help us gain the better topic models generated by Labeled-LDA. +We can conclude that only few of the bug reports in HTC and Motorola are predicted by LDA and labeled-LDA to be related to the same labels. In other words, the relation between topics and each bug report modeled by LDA is quite different from the results generated by Labeled-LDA. We think the manual efforts of labeling all the bug reports would help us gain the better topic models generated by Labeled-LDA. %\begin{figure}[htb] %\centering %\includegraphics[width=0.5\textwidth]{htcratiosim.png} -%\caption{The comparison of ratio and similarity in HTC. The result of the smaller number of bug reports related to this label in LDA or Labeled LDA divided by the larger one is the ratio of this label. The X axis is the same labels from LDA and Labeled LDA.} +%\caption{The comparison of ratio and similarity in HTC. The result of the smaller number of bug reports related to this label in LDA or labeled-LDA divided by the larger one is the ratio of this label. The X axis is the same labels from LDA and labeled-LDA.} %\end{figure} %\begin{figure}[htb] %\centering %\includegraphics[width=0.5\textwidth]{motoratiosim.png} -%\caption{The comparison of ratio and similarity in Motorola. The result of the smaller number of bug reports related to this label in LDA or Labeled LDA divided by the larger one is the ratio of this label. The X axis is the same labels from LDA and Labeled LDA.} +%\caption{The comparison of ratio and similarity in Motorola. The result of the smaller number of bug reports related to this label in LDA or labeled-LDA divided by the larger one is the ratio of this label. The X axis is the same labels from LDA and labeled-LDA.} %\end{figure} @@ -1158,7 +1162,7 @@ \section{Comparing of LDA and Labeled-LDA} % &&Google, ears, device, group, server, &contact, group, time, exchange, contactsq"e, \\ &&Gmail, policy, new, list, display&display, groups, list, droid, milestone\\ \cline{2-4} - & Setting&Volume, sound, set, pattern, default,&Settings, device, menu, turn, network, \\ + & settings&Volume, sound, set, pattern, default,&settings, device, menu, turn, network, \\ % && Turn, desire, static, control, apps,&vpn, honeycomb, button, xoom, settings-q, \\ && change, settings, media, dns, screen &behavior, right, wireless, headset, mode\\ \hline @@ -1210,7 +1214,9 @@ \section{Threats to validity} \section{Conclusion and Future Work} - In this paper we studied the Android bug reports for two Android vendors, HTC and Motorola. Based on topic analysis using Labeled LDA on a corpus of manually tagged bug reports with multiple labels, we extracted top 18 topics and categorized them into common troubled topics, common improved topics for both vendors and unique topics for each vendor. The common troubled topics tell us that the evolution of some features in Android have no positive correlation with Android evolution. The Common Improved Topics uncover that some features within the same vendor have portability issues across multiple models. All these topics show that the corresponding Android features have high correlation with the models from different vendors. After the discussion with the topics analysis, we made a conclusion that Android has both software fragmentation and hardware fragmentation. Compared with other features in Android, these 18 features implicated by these topics have more impact on its fragmentation. +In this paper we studied Android bug reports for two vendors, HTC and Motorola. Based on topic analysis using Labeled-LDA on a corpus of manually tagged bug reports with multiple labels, we extracted the top 18 topics and categorized them into common troubled topics, common improved topics and unique topics for both vendors. The common troubled topics show that there is no correlation between the troubled features of Android and Android evolution. In other words, there may be the incompatibility problem existing to the specific features of Android. The common improved topics show that some features within the same vendors have portability issues across their multiple devices. The unique topics show that different vendor has specific bug topics which imply there may be the portability problem on the different vendors with the same Android version. Furthermore, we found that the manual efforts of labeling all the bug reports would help us gain the better topic models generated by Labeled-LDA after comparing LDA and Label-LDA. + +For our future work, we will use the name of each hardware model as a label to do topic analysis while applying our methodology in order to discover the effects of different Android versions with respect to compatibility and stability. We will plan to investigate more vendors in order to reveal vendor specific bug topics. %\subsubsection{Multi-labeling} @@ -1220,25 +1226,25 @@ \section{Conclusion and Future Work} %\begin{figure}[htb] %\centering %\includegraphics[width=0.4\textwidth]{htcldallda.png} -%\caption{Comparison of number of bug reports related to the same labels from LDA and Labeled LDA in HTC. The X axis is the same labels from LDA and Labeled LDA and the Y axis is the number of bug reports.} +%\caption{Comparison of number of bug reports related to the same labels from LDA and labeled-LDA in HTC. The X axis is the same labels from LDA and labeled-LDA and the Y axis is the number of bug reports.} %\end{figure} % %\begin{figure}[htb] %\centering %\includegraphics[width=0.4\textwidth]{motoldallda.png} -%\caption{Comparison of number of bug reports related to the same labels from LDA and Labeled LDA in Motorola. The X axis is the same labels from LDA and Labeled LDA and the Y axis is the number of bug reports.} +%\caption{Comparison of number of bug reports related to the same labels from LDA and labeled-LDA in Motorola. The X axis is the same labels from LDA and labeled-LDA and the Y axis is the number of bug reports.} %\end{figure} % %\begin{figure}[htb] %\centering %\includegraphics[width=0.4\textwidth]{htcratiosim.png} -%\caption{The comparison of ratio and similarity in HTC. The result of the smaller number of bug reports related to this label in LDA or Labeled LDA divided by the larger one is the ratio of this label. The X axis is the same labels from LDA and Labeled LDA.} +%\caption{The comparison of ratio and similarity in HTC. The result of the smaller number of bug reports related to this label in LDA or labeled-LDA divided by the larger one is the ratio of this label. The X axis is the same labels from LDA and labeled-LDA.} %\end{figure} % %\begin{figure}[htb] %\centering %\includegraphics[width=0.4\textwidth]{motoratiosim.png} -%\caption{The comparison of ratio and similarity in Motorola. The result of the smaller number of bug reports related to this label in LDA or Labeled LDA divided by the larger one is the ratio of this label. The X axis is the same labels from LDA and Labeled LDA.} +%\caption{The comparison of ratio and similarity in Motorola. The result of the smaller number of bug reports related to this label in LDA or labeled-LDA divided by the larger one is the ratio of this label. The X axis is the same labels from LDA and labeled-LDA.} %\end{figure} % An example of a floating figure using the graphicx package. diff --git a/Paper_Latex/msrreference.bib b/Paper_Latex/msrreference.bib index 51f2608..c24117e 100644 --- a/Paper_Latex/msrreference.bib +++ b/Paper_Latex/msrreference.bib @@ -1,53 +1,50 @@ @Misc{usmarket, title = {{Android And Apple Now Command Nearly 70 Percent Of U.S. Smartphone Market Share}}, -note = "\url{http://techcrunch.com/2011/08/30/android-apple-70-percent-smartphone}", +note = "\url{http://techcrunch.com/2011/08/30/android-apple-70-percent-smartphone, retrieved March, 2012}", } @Misc{analysis, title = {{An Analysis of Android Fragmentation}}, -note = "\url{http://www.tech-thoughts.net/2012/03/analysis-of-android-fragmentation.html}", +note = "\url{http://www.tech-thoughts.net/2012/03/analysis-of-android-fragmentation.html, retrieved March, 2012}", } @Misc{testing, -title = {{Zipline CEO:Stop whining about Android fragmentation and do some damn QA!}}, -note = "\url{http://thenextweb.com/google/2012/04/02/zipline-ceo-stop-whining-about-android-fragmentation-and-do-some-damn-qa/}", +title = {{Stop whining about Android fragmentation}}, +note = "\url{http://thenextweb.com/google/2012/04/02/zipline-ceo-stop-whining-about-android-fragmentation-and-do-some-damn-qa, retrieved March, 2012}", } - - @Misc{historyofandroid, title = {{A breif history of Android}}, -note = "\url{http://reviews.cnet.com/8301-19736_7-20016542-251/a-brief-history-of-android-phones}", +note = "\url{http://reviews.cnet.com/8301-19736_7-20016542-251/a-brief-history-of-android-phones , retrieved March, 2012}", } @Misc{androidwebsite, title = {{Android website:}}, -note = "\url{http://developer.android.com/sdk/index.html}", +note = "\url{http://developer.android.com/sdk/index.html, retrieved March, 2012}", } @Misc{androidfragmentation, title = {{The pros and cons of Android fragmentation:}}, -note = "\url{http://mashable.com/2012/01/03/android-fragmentation}", -} +note = "\url{http://mashable.com/2012/01/03/android-fragmentation, retrieved March, 2012"} @Misc{wifiissue, title = {{HTC WiFi issue:}}, -note = "\url{http://www.linuxfordevices.com/c/a/News/HTC-fixes-WiFi-vulnerability-on-nine-phones}", +note = "\url{http://www.linuxfordevices.com/c/a/News/HTC-fixes-WiFi-vulnerability-on-nine-phones, retrieved March, 2012}", } @Misc{droid, title = {{Motorola Droid:}}, -note = "\url{http://en.wikipedia.org/wiki/Motorola_Droid}", +note = "\url{http://en.wikipedia.org/wiki/Motorola_Droid, retrieved March, 2012}", } @Misc{Synchronizationissue, title = {{Synchronization issue:}}, -note = "\url{http://androidadvices.com/backup-android-phone-contacts-sms-computer}", +note = "\url{http://androidadvices.com/backup-android-phone-contacts-sms-computer, retrieved March, 2012}", } @Misc{timetoendfragmentation, title = {{Google Android: It's time to end the fragmentation:}}, -note = "\url{http://www.zdnet.com/blog/gadgetreviews/google-android-its-time-to-end-the-fragmentation/13279}", +note = "\url{http://www.zdnet.com/blog/gadgetreviews/google-android-its-time-to-end-the-fragmentation/13279, retrieved March, 2012}", } @Misc{androidcompatibility, title = {{On Android Compatibility:}}, -note = "\url{http://android-developers.blogspot.ca/2010/05/on-android-compatibility.html}", +note = "\url{http://android-developers.blogspot.ca/2010/05/on-android-compatibility.html, retrieved March, 2012}", } @InProceedings{labeledlda, @@ -62,8 +59,7 @@ @InProceedings{labeledlda } @Misc{stmt, title = {{Stanford Topic Modeling Toolbox:}}, -howpublished = {Website}, -note = "\url{http://nlp.stanford.edu/software/tmt/tmt-0.4}", +note = "\url{http://nlp.stanford.edu/software/tmt/tmt-0.4, retrieved March, 2012}", } @INPROCEEDINGS{Marcus04aninformation, author = {Andrian Marcus and Andrey Sergeyev and Václav Rajlich and Jonathan I. Maletic}, @@ -163,15 +159,8 @@ @inproceedings{Hindle2011 booktitle = {Proceedings of the 8th Working Conference on Mining Software Repositories}, series = {MSR '11}, year = {2011}, - isbn = {978-1-4503-0574-7}, location = {Waikiki, Honolulu, HI, USA}, pages = {163--172}, - numpages = {10}, - sillyurl = {http://doi.acm.org/10.1145/1985441.1985466}, - doi = {10.1145/1985441.1985466}, - acmid = {1985466}, publisher = {ACM}, address = {New York, NY, USA}, - oldnum = 9, - keywords = {lda, non-functional requirements, topic analysis} }