diff --git a/Paper_Latex/bare_conf_CS-beta1.1.aux b/Paper_Latex/bare_conf_CS-beta1.1.aux index bb78e1c..8823dd9 100644 --- a/Paper_Latex/bare_conf_CS-beta1.1.aux +++ b/Paper_Latex/bare_conf_CS-beta1.1.aux @@ -18,11 +18,17 @@ \citation{analysis} \citation{analysis} \citation{testing} +\citation{Marcus04aninformation} +\citation{Asuncion:2010} +\citation{Linstead:2009} \citation{labeledlda} +\@writefile{toc}{\contentsline {section}{\numberline {I}Introduction}{1}{section.1}} +\citation{hindle9s} +\citation{li2003topic} +\citation{li2003topic} +\citation{ldawiki} \citation{labeledlda} \citation{stmt} -\@writefile{toc}{\contentsline {section}{\numberline {I}Introduction}{1}{section.1}} -\@writefile{toc}{\contentsline {section}{\numberline {II}Background}{1}{section.2}} \citation{Marcus04aninformation} \citation{Asuncion:2010} \citation{Linstead:2009} @@ -30,69 +36,56 @@ \citation{Asuncion:2010} \citation{Hindle} \citation{MSRChallenge2012} -\citation{Hindle2011} +\@writefile{toc}{\contentsline {section}{\numberline {II}Background}{2}{section.2}} \@writefile{toc}{\contentsline {section}{\numberline {III}Related Work}{2}{section.3}} -\@writefile{toc}{\contentsline {section}{\numberline {IV}Methodology}{2}{section.4}} -\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {IV-A}}Generating the data}{2}{subsection.4.1}} \@writefile{lot}{\contentsline {table}{\numberline {I}{\ignorespaces Manual labels from bug reports of HTC and Motorola.}}{2}{table.1}} \newlabel{selected1}{{I}{2}{Manual labels from bug reports of HTC and Motorola}{table.1}{}} +\@writefile{toc}{\contentsline {section}{\numberline {IV}Methodology}{2}{section.4}} +\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {IV-A}}Generating the data}{2}{subsection.4.1}} \@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {IV-B}}Research Features as Potential Labels}{2}{subsection.4.2}} -\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {IV-C}}Developing Labels}{2}{subsection.4.3}} +\citation{Hindle2011} \citation{Thomas:2011} \citation{Hindle2011} \citation{Hindle} +\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {IV-C}}Developing Labels}{3}{subsection.4.3}} \@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {IV-D}}Labelling the HTC and Motorola Bug Reports}{3}{subsection.4.4}} \@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {IV-E}}Applying Labeled-LDA}{3}{subsection.4.5}} \newlabel{equation1}{{1}{3}{Applying Labeled-LDA\relax }{equation.4.1}{}} \@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {IV-F}}Applying LDA}{3}{subsection.4.6}} \citation{historyofandroid} \@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {IV-G}}Comparing the Effort to Use LDA and Labeled-LDA}{4}{subsection.4.7}} -\@writefile{toc}{\contentsline {section}{\numberline {V}Topic Mining and Analysis}{4}{section.5}} \@writefile{lot}{\contentsline {table}{\numberline {II}{\ignorespaces Selected topics from LDA with manual labels. Word lists are inferred by LDA.}}{4}{table.2}} \newlabel{seleted2}{{II}{4}{Selected topics from LDA with manual labels. Word lists are inferred by LDA}{table.2}{}} +\@writefile{toc}{\contentsline {section}{\numberline {V}Topic Mining and Analysis}{4}{section.5}} \@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {V-A}}Overview of bug reports in HTC and Motorola}{4}{subsection.5.1}} -\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {V-B}}Topics Analysis of HTC and Motorola}{4}{subsection.5.2}} -\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces Number of bugs with the major version of Android for HTC and Motorola}}{5}{figure.1}} -\newlabel{bugovertime}{{1}{5}{Number of bugs with the major version of Android for HTC and Motorola\relax }{figure.1}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces Number of bug reports with the major version of Android for HTC and Motorola}}{5}{figure.1}} +\newlabel{bugovertime}{{1}{5}{Number of bug reports with the major version of Android for HTC and Motorola\relax }{figure.1}{}} +\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {V-B}}Topics Analysis of HTC and Motorola}{5}{subsection.5.2}} \@writefile{toc}{\contentsline {subsubsection}{\numberline {\unhbox \voidb@x \hbox {V-B}1}Common Troubled Topic}{5}{subsubsection.5.2.1}} -\@writefile{toc}{\contentsline {subsubsection}{\numberline {\unhbox \voidb@x \hbox {V-B}2}Common Improved Topic}{5}{subsubsection.5.2.2}} \@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces Common Troubled Topics in HTC and Motorola}}{6}{figure.2}} \newlabel{commontopic}{{2}{6}{Common Troubled Topics in HTC and Motorola\relax }{figure.2}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces Common Improved Topics in HTC and Motorola}}{6}{figure.3}} -\newlabel{fixtopic}{{3}{6}{Common Improved Topics in HTC and Motorola\relax }{figure.3}{}} +\@writefile{toc}{\contentsline {subsubsection}{\numberline {\unhbox \voidb@x \hbox {V-B}2}Common Improved Topic}{6}{subsubsection.5.2.2}} +\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces Common Improved Topics in HTC and Motorola}}{7}{figure.3}} +\newlabel{fixtopic}{{3}{7}{Common Improved Topics in HTC and Motorola\relax }{figure.3}{}} \@writefile{toc}{\contentsline {subsubsection}{\numberline {\unhbox \voidb@x \hbox {V-B}3}Unique Topics}{7}{subsubsection.5.2.3}} \@writefile{toc}{\contentsline {section}{\numberline {VI}Discussion of Fragmentation}{7}{section.6}} -\@writefile{toc}{\contentsline {section}{\numberline {VII}Comparing of LDA and Labeled-LDA}{7}{section.7}} \citation{MSRChallenge2012} \@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces Unique Topics relevance in HTC}}{8}{figure.4}} \newlabel{uniquehtc}{{4}{8}{Unique Topics relevance in HTC\relax }{figure.4}{}} \@writefile{lof}{\contentsline {figure}{\numberline {5}{\ignorespaces Unique Topics relevance in Motorola}}{8}{figure.5}} \newlabel{uniquemoto}{{5}{8}{Unique Topics relevance in Motorola\relax }{figure.5}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {8}{\ignorespaces Comparison of number of bug reports related to the same labels from LDA and Labeled LDA in HTC. The X axis is the same labels from LDA and Labeled LDA and the Y axis is the number of bug reports.}}{8}{figure.8}} -\newlabel{bughtc}{{8}{8}{Comparison of number of bug reports related to the same labels from LDA and Labeled LDA in HTC. The X axis is the same labels from LDA and Labeled LDA and the Y axis is the number of bug reports}{figure.8}{}} +\@writefile{toc}{\contentsline {section}{\numberline {VII}Comparing of LDA and Labeled-LDA}{8}{section.7}} +\@writefile{lof}{\contentsline {figure}{\numberline {8}{\ignorespaces Comparison of number of bug reports related to the same labels from LDA and labeled-LDA in HTC. The X axis is the same labels from LDA and labeled-LDA and the Y axis is the number of bug reports.}}{8}{figure.8}} +\newlabel{bughtc}{{8}{8}{Comparison of number of bug reports related to the same labels from LDA and labeled-LDA in HTC. The X axis is the same labels from LDA and labeled-LDA and the Y axis is the number of bug reports}{figure.8}{}} \@writefile{toc}{\contentsline {section}{\numberline {VIII}Threats to validity}{8}{section.8}} -\@writefile{lof}{\contentsline {figure}{\numberline {9}{\ignorespaces Comparison of number of bug reports related to the same labels from LDA and Labeled LDA in Motorola. The X axis is the same labels from LDA and Labeled LDA and the Y axis is the number of bug reports.}}{8}{figure.9}} -\newlabel{bugmoto}{{9}{8}{Comparison of number of bug reports related to the same labels from LDA and Labeled LDA in Motorola. The X axis is the same labels from LDA and Labeled LDA and the Y axis is the number of bug reports}{figure.9}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {6}{\ignorespaces Jaccard similarity of labels between LDA and Labeled-LDA in HTC. X axis is the labels in Labeled LDA and Y axis is the labels of topics generated by LDA. The label ``null" in the Y axis means that topic cannot be labeled. The result is based on the HTC bug reports under the threshold of document relevance of 0.2. Brighter means higher Jaccard similarity.}}{9}{figure.6}} -\newlabel{similarityhtc}{{6}{9}{Jaccard similarity of labels between LDA and Labeled-LDA in HTC. X axis is the labels in Labeled LDA and Y axis is the labels of topics generated by LDA. The label ``null" in the Y axis means that topic cannot be labeled. The result is based on the HTC bug reports under the threshold of document relevance of 0.2. Brighter means higher Jaccard similarity}{figure.6}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {7}{\ignorespaces Jaccard similarity of labels between LDA and Labeled-LDA in Motorola. X axis is the labels in Labeled LDA and Y axis is the labels of topics generated by LDA. The label ``null" in the Y axis means that topic cannot be labeled. The result is based on the Motorola bug reports under the threshold of document relevance of 0.2. Brighter means higher Jaccard similarity.}}{9}{figure.7}} -\newlabel{similaritymoto}{{7}{9}{Jaccard similarity of labels between LDA and Labeled-LDA in Motorola. X axis is the labels in Labeled LDA and Y axis is the labels of topics generated by LDA. The label ``null" in the Y axis means that topic cannot be labeled. The result is based on the Motorola bug reports under the threshold of document relevance of 0.2. Brighter means higher Jaccard similarity}{figure.7}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {6}{\ignorespaces Jaccard similarity of labels between LDA and Labeled-LDA in HTC. X axis is the labels in labeled-LDA and Y axis is the labels of topics generated by LDA. The label ``null" in the Y axis means that topic cannot be labeled. The result is based on the HTC bug reports under the threshold of document relevance of 0.2. Brighter means higher Jaccard similarity.}}{9}{figure.6}} +\newlabel{similarityhtc}{{6}{9}{Jaccard similarity of labels between LDA and Labeled-LDA in HTC. X axis is the labels in labeled-LDA and Y axis is the labels of topics generated by LDA. The label ``null" in the Y axis means that topic cannot be labeled. The result is based on the HTC bug reports under the threshold of document relevance of 0.2. Brighter means higher Jaccard similarity}{figure.6}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {7}{\ignorespaces Jaccard similarity of labels between LDA and Labeled-LDA in Motorola. X axis is the labels in labeled-LDA and Y axis is the labels of topics generated by LDA. The label ``null" in the Y axis means that topic cannot be labeled. The result is based on the Motorola bug reports under the threshold of document relevance of 0.2. Brighter means higher Jaccard similarity.}}{9}{figure.7}} +\newlabel{similaritymoto}{{7}{9}{Jaccard similarity of labels between LDA and Labeled-LDA in Motorola. X axis is the labels in labeled-LDA and Y axis is the labels of topics generated by LDA. The label ``null" in the Y axis means that topic cannot be labeled. The result is based on the Motorola bug reports under the threshold of document relevance of 0.2. Brighter means higher Jaccard similarity}{figure.7}{}} \@writefile{toc}{\contentsline {section}{\numberline {IX}Conclusion and Future Work}{9}{section.9}} \bibstyle{IEEEtran} \bibdata{IEEEabrv,msrreference} -\bibcite{usmarket}{1} -\bibcite{analysis}{2} -\bibcite{testing}{3} -\bibcite{labeledlda}{4} -\bibcite{stmt}{5} -\bibcite{Marcus04aninformation}{6} -\bibcite{Asuncion:2010}{7} -\@writefile{lot}{\contentsline {table}{\numberline {III}{\ignorespaces Common Topics and associated Word List with Related Top 10 Terms}}{10}{table.3}} -\newlabel{topicslist}{{III}{10}{Common Topics and associated Word List with Related Top 10 Terms\relax }{table.3}{}} -\@writefile{toc}{\contentsline {section}{References}{10}{section*.1}} -\bibcite{Linstead:2009}{8} -\bibcite{Thomas:2011}{9} -\bibcite{Hindle}{10} -\bibcite{MSRChallenge2012}{11} -\bibcite{Hindle2011}{12} -\bibcite{historyofandroid}{13} +\@writefile{lof}{\contentsline {figure}{\numberline {9}{\ignorespaces Comparison of number of bug reports related to the same labels from LDA and labeled-LDA in Motorola. The X axis is the same labels from LDA and labeled-LDA and the Y axis is the number of bug reports.}}{10}{figure.9}} +\newlabel{bugmoto}{{9}{10}{Comparison of number of bug reports related to the same labels from LDA and labeled-LDA in Motorola. The X axis is the same labels from LDA and labeled-LDA and the Y axis is the number of bug reports}{figure.9}{}} +\@writefile{lot}{\contentsline {table}{\numberline {III}{\ignorespaces Topics and associated Word List with Related Top 15 Terms}}{11}{table.3}} +\newlabel{topicslist}{{III}{11}{Topics and associated Word List with Related Top 15 Terms\relax }{table.3}{}} diff --git a/Paper_Latex/bare_conf_CS-beta1.1.log b/Paper_Latex/bare_conf_CS-beta1.1.log index 5e70c18..258ef1d 100644 --- a/Paper_Latex/bare_conf_CS-beta1.1.log +++ b/Paper_Latex/bare_conf_CS-beta1.1.log @@ -1,7 +1,7 @@ -This is pdfTeX, Version 3.1415926-2.3-1.40.12 (MiKTeX 2.9) (preloaded format=pdflatex 2011.12.15) 19 APR 2012 00:29 +This is pdfTeX, Version 3.1415926-2.3-1.40.12 (MiKTeX 2.9) (preloaded format=pdflatex 2011.12.15) 19 APR 2012 17:17 entering extended mode **bare_conf_CS-beta1.1.tex -(D:\github\fragment_android\Paper_Latex\bare_conf_CS-beta1.1.tex +(D:\git\fragment_android\Paper_Latex\bare_conf_CS-beta1.1.tex LaTeX2e <2011/06/27> Babel and hyphenation patterns for english, afrikaans, ancientgreek, ar abic, armenian, assamese, basque, bengali, bokmal, bulgarian, catalan, coptic, @@ -13,7 +13,7 @@ an, ngerman-x-2009-06-19, nynorsk, oriya, panjabi, pinyin, polish, portuguese, romanian, russian, sanskrit, serbian, slovak, slovenian, spanish, swedish, swis sgerman, tamil, telugu, turkish, turkmen, ukenglish, ukrainian, uppersorbian, u senglishmax, welsh, loaded. -(D:\github\fragment_android\Paper_Latex\IEEEtran.cls +(D:\git\fragment_android\Paper_Latex\IEEEtran.cls Document Class: IEEEtran 2007/03/05 V1.7a by Michael Shell -- See the "IEEEtran_HOWTO" manual for usage information. -- http://www.michaelshell.org/tex/ieeetran/ @@ -312,7 +312,7 @@ Package uniquecounter Info: New unique counter `rerunfilecheck' on input line 2 ("D:\Program Files\miktex\tex\latex\multirow\multirow.sty" \bigstrutjot=\dimen132 ) -(D:\github\fragment_android\Paper_Latex\bare_conf_CS-beta1.1.aux) +(D:\git\fragment_android\Paper_Latex\bare_conf_CS-beta1.1.aux) LaTeX Font Info: Checking defaults for OML/cmm/m/it on input line 353. LaTeX Font Info: ... okay on input line 353. LaTeX Font Info: Checking defaults for T1/cmr/m/n on input line 353. @@ -356,8 +356,8 @@ LaTeX Info: Redefining \ref on input line 353. LaTeX Info: Redefining \pageref on input line 353. LaTeX Info: Redefining \nameref on input line 353. -(D:\github\fragment_android\Paper_Latex\bare_conf_CS-beta1.1.out) -(D:\github\fragment_android\Paper_Latex\bare_conf_CS-beta1.1.out) +(D:\git\fragment_android\Paper_Latex\bare_conf_CS-beta1.1.out) +(D:\git\fragment_android\Paper_Latex\bare_conf_CS-beta1.1.out) \@outlinefile=\write3 LaTeX Font Info: Font shape `OT1/ptm/bx/n' in size <14> not available (Font) Font shape `OT1/ptm/b/n' tried instead on input line 405. @@ -369,122 +369,207 @@ File: omsptm.fd ) LaTeX Font Info: Font shape `OMS/ptm/m/it' in size <10> not available (Font) Font shape `OMS/cmsy/m/n' tried instead on input line 405. -LaTeX Font Info: Try loading font information for U+msa on input line 432. +LaTeX Font Info: Try loading font information for U+msa on input line 434. ("D:\Program Files\miktex\tex\latex\amsfonts\umsa.fd" File: umsa.fd 2009/06/22 v3.00 AMS symbols A ) -LaTeX Font Info: Try loading font information for U+msb on input line 432. +LaTeX Font Info: Try loading font information for U+msb on input line 434. ("D:\Program Files\miktex\tex\latex\amsfonts\umsb.fd" File: umsb.fd 2009/06/22 v3.00 AMS symbols B ) -Overfull \hbox (1.25832pt too wide) in paragraph at lines 432--432 -[][][][][]$\OT1/ptm/m/n/8 http : / / asymco . com / 2011 / 11 / 17 / the-[]glob -al-[]smartphone-[]market-[]landscape$[]| +Underfull \hbox (badness 4726) in paragraph at lines 434--434 +[][][][]\OT1/ptm/m/n/8 The Global Smart-phone Mar-ket Land-scape: []$http : / / + www . asymco .$ [] + +Underfull \hbox (badness 10000) in paragraph at lines 434--434 +$\OT1/ptm/m/n/8 com / 2011 / 11 / 17 / the-[]global-[]smartphone-[]market-[]lan +dscape(retrievedMarch ,[]$ + [] + + +LaTeX Warning: Citation `usmarket' on page 1 undefined on input line 434. + Missing character: There is no â in font ptmr7t! Missing character: There is no € in font ptmr7t! Missing character: There is no ™ in font ptmr7t! + +LaTeX Warning: Citation `analysis' on page 1 undefined on input line 434. + + +LaTeX Warning: Citation `analysis' on page 1 undefined on input line 434. + + +LaTeX Warning: Citation `testing' on page 1 undefined on input line 437. + + +LaTeX Warning: Citation `Marcus04aninformation' on page 1 undefined on input li +ne 439. + + +LaTeX Warning: Citation `Asuncion:2010' on page 1 undefined on input line 439. + + +LaTeX Warning: Citation `Linstead:2009' on page 1 undefined on input line 439. + + +LaTeX Warning: Citation `labeledlda' on page 1 undefined on input line 439. + +[1{C:/Users/Ray/AppData/Local/MiKTeX/2.9/pdftex/config/pdftex.map} + + +] + +LaTeX Warning: Citation `hindle9s' on page 2 undefined on input line 465. + + +LaTeX Warning: Citation `li2003topic' on page 2 undefined on input line 465. + + +LaTeX Warning: Citation `li2003topic' on page 2 undefined on input line 466. + + +Underfull \hbox (badness 1365) in paragraph at lines 465--467 +\OT1/ptm/m/n/10 tems (SCS) is use-ful in a va-ri-ety of text pro-cess-ing + [] + + +LaTeX Warning: Citation `ldawiki' on page 2 undefined on input line 468. + + +LaTeX Warning: Citation `labeledlda' on page 2 undefined on input line 470. + Missing character: There is no â in font ptmr7t! Missing character: There is no € in font ptmr7t! Missing character: There is no ™ in font ptmr7t! -Missing character: There is no â in font ptmr7t! -Missing character: There is no € in font ptmr7t! -Missing character: There is no ™ in font ptmr7t! -[1{C:/Users/Ray/AppData/Local/MiKTeX/2.9/pdftex/config/pdftex.map} +LaTeX Warning: Citation `stmt' on page 2 undefined on input line 470. -] -Overfull \hbox (19.03108pt too wide) in paragraph at lines 520--546 + +LaTeX Warning: Citation `Marcus04aninformation' on page 2 undefined on input li +ne 481. + + +LaTeX Warning: Citation `Asuncion:2010' on page 2 undefined on input line 481. + + +LaTeX Warning: Citation `Linstead:2009' on page 2 undefined on input line 481. + + +LaTeX Warning: Citation `Thomas:2011' on page 2 undefined on input line 481. + + +LaTeX Warning: Citation `Asuncion:2010' on page 2 undefined on input line 483. + + +LaTeX Warning: Citation `Hindle' on page 2 undefined on input line 487. + + +LaTeX Warning: Citation `MSRChallenge2012' on page 2 undefined on input line 50 +3. + + +Overfull \hbox (19.03108pt too wide) in paragraph at lines 529--555 [][] [] -Underfull \hbox (badness 3417) in paragraph at lines 569--569 +Underfull \hbox (badness 3417) in paragraph at lines 578--578 [][][][]\OT1/ptm/m/n/8 Android Op-er-at-ing Sys-tem sum-mary: []$http : / / en . wikipedia . org / wiki /$ [] -Underfull \hbox (badness 10000) in paragraph at lines 575--575 +Underfull \hbox (badness 10000) in paragraph at lines 584--584 [][][][]\OT1/ptm/m/n/8 Android Com-par-i-son: []$http : / / en . wikipedia . or g / wiki / Comparison[]of[]$ [] [2] + +LaTeX Warning: Citation `Hindle2011' on page 3 undefined on input line 601. + Missing character: There is no â in font ptmr7t! Missing character: There is no € in font ptmr7t! Missing character: There is no ™ in font ptmr7t! +LaTeX Warning: Command \l invalid in math mode on input line 713. + + +LaTeX Warning: Command \l invalid in math mode on input line 715. -LaTeX Warning: Command \l invalid in math mode on input line 704. +LaTeX Warning: Command \l invalid in math mode on input line 715. -LaTeX Warning: Command \l invalid in math mode on input line 706. +LaTeX Warning: Command \l invalid in math mode on input line 718. -LaTeX Warning: Command \l invalid in math mode on input line 706. +LaTeX Warning: Command \l invalid in math mode on input line 718. -LaTeX Warning: Command \l invalid in math mode on input line 709. +LaTeX Warning: Citation `Thomas:2011' on page 3 undefined on input line 766. -LaTeX Warning: Command \l invalid in math mode on input line 709. + +LaTeX Warning: Citation `Hindle2011' on page 3 undefined on input line 766. + + +LaTeX Warning: Citation `Hindle' on page 3 undefined on input line 766. [3] -Overfull \hbox (15.64706pt too wide) in paragraph at lines 830--849 +Overfull \hbox (15.64706pt too wide) in paragraph at lines 839--858 [][] [] - + File: bugovertime.png Graphic file (type png) -Package pdftex.def Info: bugovertime.png used on input line 884. +Package pdftex.def Info: bugovertime.png used on input line 893. (pdftex.def) Requested size: 505.89pt x 227.56071pt. -[4] + +LaTeX Warning: Citation `historyofandroid' on page 4 undefined on input line 90 +2. + +[4] File: commontopic.png Graphic file (type png) -Package pdftex.def Info: commontopic.png used on input line 922. +Package pdftex.def Info: commontopic.png used on input line 925. (pdftex.def) Requested size: 505.89pt x 254.29144pt. -Underfull \vbox (badness 1831) has occurred while \output is active [] +Underfull \hbox (badness 1371) in paragraph at lines 933--936 +\OT1/ptm/m/n/10 ti-cal terms. That means they have the same bug re- + [] - +Underfull \vbox (badness 1831) has occurred while \output is active [] + + [5 ] + File: fixtopic.png Graphic file (type png) -Package pdftex.def Info: fixtopic.png used on input line 942. +Package pdftex.def Info: fixtopic.png used on input line 945. (pdftex.def) Requested size: 505.89pt x 190.85187pt. -Underfull \vbox (badness 5022) has occurred while \output is active [] - - [5 ] -Underfull \hbox (badness 5817) in paragraph at lines 951--952 -\OT1/ptm/m/n/10 and Mo-torola share many iden-ti-cal terms for wifi +Underfull \hbox (badness 2970) in paragraph at lines 954--955 +[]\OT1/ptm/m/it/10 Common Im-proved Top-ics \OT1/ptm/m/n/10 shown in Ta-ble []I +II[] for [] - -Underfull \vbox (badness 10000) has occurred while \output is active [] - - -Underfull \vbox (badness 10000) has occurred while \output is active [] - - [6 ] +[6 ] + File: uniquehtc.png Graphic file (type png) - - -Package pdftex.def Info: uniquehtc.png used on input line 977. + +Package pdftex.def Info: uniquehtc.png used on input line 980. (pdftex.def) Requested size: 505.89pt x 70.02682pt. - -File: uniquemoto.png Graphic file (type png) - -Package pdftex.def Info: uniquemoto.png used on input line 984. + +File: uniquemoto.png Graphic file (type png) + +Package pdftex.def Info: uniquemoto.png used on input line 987. (pdftex.def) Requested size: 505.89pt x 70.02682pt. Missing character: There is no â in font ptmr7t! Missing character: There is no € in font ptmr7t! @@ -504,105 +589,70 @@ Missing character: There is no Missing character: There is no â in font ptmr7t! Missing character: There is no € in font ptmr7t! Missing character: There is no ť in font ptmr7t! - [7] -File: htcsim.png Graphic file (type png) - -Package pdftex.def Info: htcsim.png used on input line 1076. +[7 ] + +File: htcsim.png Graphic file (type png) + +Package pdftex.def Info: htcsim.png used on input line 1079. (pdftex.def) Requested size: 505.89pt x 196.72829pt. - -File: motosim.png Graphic file (type png) - -Package pdftex.def Info: motosim.png used on input line 1083. + +File: motosim.png Graphic file (type png) + +Package pdftex.def Info: motosim.png used on input line 1086. (pdftex.def) Requested size: 505.89pt x 198.38048pt. - -File: htcldallda.png Graphic file (type png) - -Package pdftex.def Info: htcldallda.png used on input line 1090. + +File: htcldallda.png Graphic file (type png) + +Package pdftex.def Info: htcldallda.png used on input line 1093. (pdftex.def) Requested size: 252.94499pt x 179.82805pt. -Overfull \hbox (9.03374pt too wide) in paragraph at lines 1090--1091 +Overfull \hbox (9.03374pt too wide) in paragraph at lines 1093--1094 [][] [] - + File: motoldallda.png Graphic file (type png) -Package pdftex.def Info: motoldallda.png used on input line 1097. +Package pdftex.def Info: motoldallda.png used on input line 1100. (pdftex.def) Requested size: 252.94499pt x 170.83348pt. -Overfull \hbox (9.03374pt too wide) in paragraph at lines 1097--1098 +Overfull \hbox (9.03374pt too wide) in paragraph at lines 1100--1101 [][] [] + +Overfull \hbox (3.45119pt too wide) in paragraph at lines 1132--1230 + [][] + [] + + +LaTeX Warning: Float too large for page by 24.45648pt on input line 1230. + Missing character: There is no â in font ptmr7t! Missing character: There is no € in font ptmr7t! Missing character: There is no “ in font ptmr7t! + +LaTeX Warning: Citation `MSRChallenge2012' on page 8 undefined on input line 12 +34. + Missing character: There is no â in font ptmr7t! Missing character: There is no € in font ptmr7t! Missing character: There is no “ in font ptmr7t! +[8 ] Missing character: There is no â in font ptmr7t! Missing character: There is no € in font ptmr7t! Missing character: There is no “ in font ptmr7t! Missing character: There is no â in font ptmr7t! Missing character: There is no € in font ptmr7t! Missing character: There is no “ in font ptmr7t! -[8 ] -Underfull \vbox (badness 10000) has occurred while \output is active [] - - [9 ] -(D:\github\fragment_android\Paper_Latex\bare_conf_CS-beta1.1.bbl -Underfull \hbox (badness 10000) in paragraph at lines 30--32 -[]\OT1/ptm/m/n/9 ``An Anal-y-sis of An-droid Frag-men-ta- - [] - - -Underfull \hbox (badness 10000) in paragraph at lines 30--32 -\OT1/ptm/m/n/9 tion,'' []$http : / / www . tech-[]thoughts . net / 2012 / 03 /$ - - [] - - -Underfull \hbox (badness 10000) in paragraph at lines 34--36 -[]\OT1/ptm/m/n/9 ``Zipline CEO:Stop whin-ing about An- - [] - - -Underfull \hbox (badness 10000) in paragraph at lines 34--36 -\OT1/ptm/m/n/9 droid frag-men-ta-tion and do some damn - [] - - -Underfull \hbox (badness 10000) in paragraph at lines 34--36 -\OT1/ptm/m/n/9 QA!'' []$http : / / thenextweb . com / google / 2012 / 04 / 02 / -$ - [] - - -Overfull \hbox (68.26578pt too wide) in paragraph at lines 34--36 -$\OT1/ptm/m/n/9 zipline-[]ceo-[]stop-[]whining-[]about-[]android-[]fragmentatio -n-[]and-[]do-[]some-[]damn-[]qa/$[]. - [] - - -Underfull \vbox (badness 1552) has occurred while \output is active [] - - [10] -Underfull \hbox (badness 10000) in paragraph at lines 92--94 -[]\OT1/ptm/m/n/9 ``A breif his-tory of An-droid,'' []$http : / /$ - [] - - -Underfull \hbox (badness 10000) in paragraph at lines 92--94 -$\OT1/ptm/m/n/9 reviews . cnet . com / 8301-[]19736[]7-[]20016542-[]251 /$ - [] - -) + [9 ] +No file bare_conf_CS-beta1.1.bbl. ** Conference Paper ** Before submitting the final camera ready copy, remember to: @@ -614,30 +664,31 @@ Before submitting the final camera ready copy, remember to: uses only Type 1 fonts and that every step in the generation process uses the appropriate paper size. -Package atveryend Info: Empty hook `BeforeClearDocument' on input line 1368. -[11 +Package atveryend Info: Empty hook `BeforeClearDocument' on input line 1401. +[10 -] -Package atveryend Info: Empty hook `AfterLastShipout' on input line 1368. - (D:\github\fragment_android\Paper_Latex\bare_conf_CS-beta1.1.aux) -Package atveryend Info: Executing hook `AtVeryEndDocument' on input line 1368. -Package atveryend Info: Executing hook `AtEndAfterFileList' on input line 1368. + ] [11] +Package atveryend Info: Empty hook `AfterLastShipout' on input line 1401. + +(D:\git\fragment_android\Paper_Latex\bare_conf_CS-beta1.1.aux) +Package atveryend Info: Executing hook `AtVeryEndDocument' on input line 1401. +Package atveryend Info: Executing hook `AtEndAfterFileList' on input line 1401. Package rerunfilecheck Info: File `bare_conf_CS-beta1.1.out' has not changed. -(rerunfilecheck) Checksum: 229DC3667C93D68FF7223FFCB19CE56C;1515. +(rerunfilecheck) Checksum: EC2F65B113282752CD367D8F08E7F8E3;1467. -LaTeX Warning: Label(s) may have changed. Rerun to get cross-references right. +LaTeX Warning: There were undefined references. ) Here is how much of TeX's memory you used: - 6575 strings out of 494045 - 97240 string characters out of 3145974 - 208596 words of memory out of 3000000 - 9722 multiletter control sequences out of 15000+200000 - 45422 words of font info for 95 fonts, out of 3000000 for 9000 + 6543 strings out of 494045 + 96645 string characters out of 3145974 + 214159 words of memory out of 3000000 + 9711 multiletter control sequences out of 15000+200000 + 42770 words of font info for 88 fonts, out of 3000000 for 9000 718 hyphenation exceptions out of 8191 - 29i,8n,28p,1181b,457s stack positions out of 5000i,500n,10000p,200000b,50000s + 29i,8n,28p,1327b,320s stack positions out of 5000i,500n,10000p,200000b,50000s {D:/Program Files/miktex/fonts/enc/dvips/fontname/8r.enc} -Output written on bare_conf_CS-beta1.1.pdf (11 pages, 662577 bytes). +Output written on bare_conf_CS-beta1.1.pdf (11 pages, 655680 bytes). PDF statistics: - 335 PDF objects out of 1000 (max. 8388607) - 65 named destinations out of 1000 (max. 500000) - 222 words of extra memory for PDF output out of 10000 (max. 10000000) + 280 PDF objects out of 1000 (max. 8388607) + 51 named destinations out of 1000 (max. 500000) + 214 words of extra memory for PDF output out of 10000 (max. 10000000) diff --git a/Paper_Latex/bare_conf_CS-beta1.1.out b/Paper_Latex/bare_conf_CS-beta1.1.out index a380943..42ca398 100644 --- a/Paper_Latex/bare_conf_CS-beta1.1.out +++ b/Paper_Latex/bare_conf_CS-beta1.1.out @@ -19,4 +19,3 @@ \BOOKMARK [1][-]{section.7}{Comparing of LDA and Labeled-LDA}{}% 19 \BOOKMARK [1][-]{section.8}{Threats to validity}{}% 20 \BOOKMARK [1][-]{section.9}{Conclusion and Future Work}{}% 21 -\BOOKMARK [1][-]{section*.1}{References}{}% 22 diff --git a/Paper_Latex/bare_conf_CS-beta1.1.pdf b/Paper_Latex/bare_conf_CS-beta1.1.pdf index a949649..25cc32a 100644 Binary files a/Paper_Latex/bare_conf_CS-beta1.1.pdf and b/Paper_Latex/bare_conf_CS-beta1.1.pdf differ diff --git a/Paper_Latex/bare_conf_CS-beta1.1.synctex.gz b/Paper_Latex/bare_conf_CS-beta1.1.synctex.gz index e823b9a..9a7b36d 100644 Binary files a/Paper_Latex/bare_conf_CS-beta1.1.synctex.gz and b/Paper_Latex/bare_conf_CS-beta1.1.synctex.gz differ diff --git a/Paper_Latex/bare_conf_CS-beta1.1.tex b/Paper_Latex/bare_conf_CS-beta1.1.tex index b16aa83..78f3c9d 100644 --- a/Paper_Latex/bare_conf_CS-beta1.1.tex +++ b/Paper_Latex/bare_conf_CS-beta1.1.tex @@ -361,7 +361,7 @@ % use a multiple column layout for up to two different % affiliations -\author{\IEEEauthorblockN{Dan Han, Chenlei Zhang, Xiaochao Fan, Abram Hindle, Kenny Wong, and Eleni Stroulia} +\author{\IEEEauthorblockN{Dan Han, Chenlei Zhang, Xiaochao Fan, Abram Hindle, Kenny Wong and Eleni Stroulia} \IEEEauthorblockA{Department of Computing Science\\ University of Alberta \\ Edmonton, Canada\\ @@ -405,7 +405,7 @@ \begin{abstract} -Android fragmentation has been a controversial topic. In this study, we investigated the fragmentation of Android by a comparison of Android vendor's bug reports via topic analysis. We mined and analyzed the Android bug reports related to two popular Android vendors, HTC and Motorola. We manually annotated bug reports with labels and applied Labeled Latent Dirichlet Allocation (Labeled-LDA) to the datasets to produce bug topics. By comparing the distribution of average relevance of top 18 bug topics over time for both vendors, we categorized the topics into three types which are \textit{Common Troubled Topics}, \textit{Common Improved Topics} and \textit{Unique Topics}. The \textit{Common Troubled Topics} show that there is no correlation between the troubled features of Android and Android evolution. The \textit{Common Improved Topics} show that some features within the same vendors have portability issues across their multiple devices. The \textit{Unique Topics} show that different vendors have specific bug topics which imply there may be the portability problem on the different vendors with the same Android version. Our findings can be used by Android system community, stakeholders, Android device vendors and developers to make project dashboards, process investigation and feature analysis. +Android fragmentation has been a controversial topic. In this study, we investigated the fragmentation of Android by a comparison of Android vendor's bug reports via topic analysis. We mined and analyzed the Android bug reports related to two popular Android vendors, HTC and Motorola. We manually annotated bug reports with labels and applied Labeled Latent Dirichlet Allocation (Labeled-LDA) to the datasets to produce bug topics. By comparing the distribution of average relevance of top 18 bug topics over time for both vendors, we categorized the topics into three types which are \textit{Common Troubled Topics}, \textit{Common Improved Topics} and \textit{Unique Topics}. The \textit{Common Troubled Topics} show that there is no correlation between the troubled features of Android and Android evolution. The \textit{Common Improved Topics} show that some features within the same vendors have portability issues across their multiple devices. The \textit{Unique Topics} show that different vendors have specific bug topics which imply there may be the portability problem on the different vendors. Our findings can be used by Android system community, stakeholders, Android device vendors and developers to make project dashboards, process investigation and feature analysis. @@ -436,9 +436,9 @@ \section{Introduction} Android fragmentation has been a controversial topic which swells up now and again regrading its provenance and its impacts. However, no one can provide strong evidences to support their statements. Someone from industry performed experiments of Android on different devices, and they found that the root cause of fragmentation is the classical software engineering issues \cite{testing}. -In this study, we want to explore the fragmentation of Android by mining and analyzing Android user bug reports. We applied topic analysis on the Android bug reports. A topic of the document (e.g. bug reports, source code changes and commits) is generated by topic models which has been used to help understand software systems. There are a few topic models utilized by researchers in software engineering, e.g. Latent Dirichlet Allocation (LDA), Latent Semantic Index (LSI) and Labeled Latent Dirichlet Allocation (Labeled-LDA) \cite{Marcus04aninformation} \cite{Asuncion:2010} \cite{Linstead:2009}. We applied labeled-LDA\cite{labeledlda} on bug reports of different vendors and analyzed topics in bug reports. We then did the analysis on the bug topics and based on the topics analysis, we discussed what features of Android contribute much on Android fragmentation in the end. +In this study, we want to explore the fragmentation of Android by mining and analyzing Android user bug reports. We applied topic analysis on the Android bug reports. A topic of the document (e.g. bug reports, source code changes and commits) is generated by topic models which has been used to help understand software systems. There are a few topic models utilized by researchers in software engineering, e.g. Latent Dirichlet Allocation (LDA), Latent Semantic Index (LSI) and Labeled Latent Dirichlet Allocation (Labeled-LDA) \cite{Marcus04aninformation}, \cite{Asuncion:2010}, \cite{Linstead:2009}. We applied labeled-LDA \cite{labeledlda} on bug reports of different vendors and analyzed topics in bug reports. We then did the analysis on the bug topics and based on the topics analysis, we discussed what features of Android contribute much on Android fragmentation in the end. -In terms of bug reports, we chose the bug reports of HTC and Motorola in this study. HTC’s first Android phone was the HTC Dream manufactured in Oct. 2008. HTC has made more than thirty different Android phones since then. Motorola made their first Android phone in Oct. 2009 and has released more than twenty different Android phones since then. Their Android products have gained widespread popularity. +In terms of bug reports, we chose the bug reports of HTC and Motorola in this study. HTC's first Android phone was the HTC Dream manufactured in Oct. 2008. HTC has made more than thirty different Android phones since then. Motorola made their first Android phone in Oct. 2009 and has released more than twenty different Android phones since then. Their Android products have gained widespread popularity. This paper makes the following contributions: \begin{itemize} @@ -462,12 +462,12 @@ \section{Introduction} %Subsubsection text here. \section{Background} -Topic analysis, with respect to Software Control Systems(SCS) is extremely useful in a variety of text processing applications\cite{hindle9s}. It includes two main steps: topic identification and text segmentation \cite{li2003topic}. It can be used in indexing the texts automatically to retrieve information. -With it, we can understand what the main topics and sets of associated words with these topics, and where those associated words lie within the text \cite{li2003topic}. Recent topic analysis technologies include Latent Dirichlet Allocation (LDA) and Labeled LDA. +Topic analysis, with respect to Software Control Systems (SCS) is useful in a variety of text processing applications\cite{hindle9s}. It includes two main steps: topic identification and text segmentation \cite{li2003topic}. It can be used in indexing the texts automatically to retrieve information. +With it, we can understand what the main topics and sets of associated words with these topics, and where those associated words lie within the text \cite{li2003topic}. Recent topic analysis technologies include Latent Dirichlet Allocation (LDA) and Labeled-LDA. -Latent Dirichlet allocation (LDA) is an unsupervised topic model to credit text documents as mixtures of latent topics, where topics correspond to key word lists presented in the corpus \cite{ldawiki}. It has been successfully used in the software engineering area for mining and retrievng informations from large text corpora. +LDA is an unsupervised topic model to credit text documents as mixtures of latent topics, where topics correspond to key word lists presented in the corpus \cite{ldawiki}. It has been successfully used in the software engineering area for mining and retrievng informations from large text corpora. -In our research, we apply Labeled-LDA to perform topic analysis. Labeled-LDA is a supervised topic model for credit attribution in multi-labeled corpora\cite{labeledlda}. It defines a one-to-one mapping between LDA’s latent topics and tags labeled by users. In other words, Labeled LDA incorporates the multiple tags into the topics learning process and only builds topics around these tags, which is quite different from LDA. LDA, as a totally unsupervised algorithm, automatically learns a set of terms for each topic on a corpus without any constraints. To apply Labeled LDA, we utilize the Stanford Topic Modeling Toolbox (STMT)\cite{stmt}. +In our research, we apply Labeled-LDA to perform topic analysis. Labeled-LDA is a supervised topic model for credit attribution in multi-labeled corpora \cite{labeledlda}. It defines a one-to-one mapping between LDA’s latent topics and tags labeled by users. In other words, labeled-LDA incorporates the multiple tags into the topics learning process and only builds topics around these tags, which is quite different from LDA. LDA, as a totally unsupervised algorithm, automatically learns a set of terms for each topic on a corpus without any constraints. To apply labeled-LDA, we utilize the Stanford Topic Modeling Toolbox (STMT) \cite{stmt}. %\begin{figure*}[htb] %\centering @@ -480,13 +480,11 @@ \section{Background} \section{Related Work} Topic models have been used to help understand software systems. Marcus et al.\cite{Marcus04aninformation} used Latent Semantic Indexing (LSI) on both source code and user queries and then identified the most relevant source code documents with similarity measurements. Asuncion et al.\cite{Asuncion:2010} applied a coherence measurement on topics learned by LDA to model the quality of bug reports. Linstead et al.\cite{Linstead:2009} performed LDA to generate traceability links for artifacts in software projects automatically. Topic modeling is also utilized by Thomas et al.\cite{Thomas:2011} to study the evolution of topics in software projects. -Compared with all these approaches, the most important difference is the topic models. They used LDA to extract topics, while we used Labeled-LDA to obtain the topics. With LDA, they prefined the number of topics and interpreted the extracted topics to get the extracted topics\cite{Asuncion:2010}. In our work, we first manually labeled bug reports with multiple labels. Then we employed labeled-LDA to get the topics. Another difference is that there is some manual work in our study. - -to overcome the disadvantages of these unsupervised algorithms by pre-defining the number of topics and interpreting the extracted topics +Compared with all these approaches, the most important difference is the topic models. They used LDA to extract topics, while we used Labeled-LDA to obtain the topics. With LDA, they prefined the number of topics and interpreted the extracted topics to get the extracted topics\cite{Asuncion:2010}. In our work, we first manually labeled bug reports with multiple labels. Then we employed labeled-LDA to get the topics. Another difference is that there is some manual work in our study to overcome the disadvantages of these unsupervised algorithms by pre-defining the number of topics and interpreting the extracted topics. \section{Methodology} -Our methodology is to extract bug reports, assign multiple labels to each of them and then apply Labeled LDA on the labeled data. After that we calculate the average relevance of bug reports to each label over time\cite{Hindle} and compare them between two Android vendors, HTC and Motorola. In order to compare the performance between LDA and Labeled LDA, we also apply LDA on the extracted bug reports of HTC and Motorola without our manual labels. We label all the topics generated by LDA. For each vendor, we calculate the similarity of each pair of labels from LDA and Labeled LDA to evaluate their performance. +Our methodology is to extract bug reports, assign multiple labels to each of them and then apply labeled-LDA on the labeled data. After that we calculate the average relevance of bug reports to each label over time\cite{Hindle} and compare them between two Android vendors, HTC and Motorola. In order to compare the performance between LDA and labeled-LDA, we also apply LDA on the extracted bug reports of HTC and Motorola without our manual labels. We label all the topics generated by LDA. For each vendor, we calculate the similarity of each pair of labels from LDA and labeled-LDA to evaluate their performance. \subsection{Generating the data} @@ -668,11 +666,11 @@ \subsection{Labelling the HTC and Motorola Bug Reports} Table \ref{selected1} lists all the manual labels from bug reports of HTC and Motorola. -% XXX TODO Labeled LDA to Labeled-LDA +% XXX TODO labeled-LDA to Labeled-LDA \subsection{Applying Labeled-LDA} -% 4. Apply Labeled LDA:We applied the Labeled-LDA tool, Stanford Topic +% 4. Apply labeled-LDA:We applied the Labeled-LDA tool, Stanford Topic % Modeling Toobox [http://nlp.stanford.edu/software/tmt/tmt-0.4/], to % get the topic-document distribution on our labeled bug reports. @@ -699,7 +697,7 @@ \subsection{Applying Labeled-LDA} with our label, as well as a document-topic matrix which links our labels to the documents in the each bug report corpus (HTC and Motorola). -%By applying Labeled LDA to the bug reports of HTC and Motorola +%By applying labeled-LDA to the bug reports of HTC and Motorola %separately, we have the word distribution of each label and a matrix %that provides the relationship between bug reports and the labels. @@ -778,10 +776,10 @@ \subsection{Applying LDA} \subsection{Comparing the Effort to Use LDA and Labeled-LDA} -% 8. Comparison of LDA and Labeled LDA: For each pair of topics in LDA -% and Labeled LDA, we computed the their similarity based on the +% 8. Comparison of LDA and labeled-LDA: For each pair of topics in LDA +% and labeled-LDA, we computed the their similarity based on the % topic-document distribution. That is the Jaccard similarity of the two -% sets. One is from LDA and the other one is from Labeled LDA. Each set +% sets. One is from LDA and the other one is from labeled-LDA. Each set % is the bug reports that have relevance to that label. We chose several % thresholds on the relevance. That is if the relevance is under the % threshold, this bug report is not related to that label. At last we @@ -792,11 +790,11 @@ \subsection{Comparing the Effort to Use LDA and Labeled-LDA} Labeled-LDA we had to compare the results. % Thus nce LDA and Labelled-LDA were applied to the bug reports of HTC and % Motorola we had to compare the topics that were extracted. -Both LDA and Labeled LDA produce matrices of +Both LDA and labeled-LDA produce matrices of the relationship between bug reports of two vendors and the label or topics. That is if the topics generated by LDA that were labeled as the same -ones in Labeled LDA would be related to similar bug reports. +ones in labeled-LDA would be related to similar bug reports. We determined topic similarity by comparing the sets of documents relevant to a LDA topic and those relevant to a Labeled-LDA @@ -804,9 +802,9 @@ \subsection{Comparing the Effort to Use LDA and Labeled-LDA} topic we did pair-wise similarity comparisons. We applied the Jaccard similarity coefficient to compute the -similarity between each topic in LDA and each label in Labeled LDA. +similarity between each topic in LDA and each label in labeled-LDA. That is, the Jaccard similarity coefficient between label A in LDA and -label B in Labeled LDA is the ratio of the intersection of bug reports +label B in labeled-LDA is the ratio of the intersection of bug reports related to label A and label B to the union of the bug reports related to label A and label B, \begin{equation} @@ -869,25 +867,25 @@ \section{Topic Mining and Analysis} %\begin{figure}[!htb] %\centering %\includegraphics[width=0.4\textwidth]{htcldallda.png} -%\caption{Comparison of number of bug reports related to the same labels from LDA and Labeled LDA in HTC. The X axis is the same labels from LDA and Labeled LDA and the Y axis is the number of bug reports.} +%\caption{Comparison of number of bug reports related to the same labels from LDA and labeled-LDA in HTC. The X axis is the same labels from LDA and labeled-LDA and the Y axis is the number of bug reports.} %\end{figure} % %\begin{figure}[!htb] %\centering %\includegraphics[width=0.4\textwidth]{motoldallda.png} -%\caption{Comparison of number of bug reports related to the same labels from LDA and Labeled LDA in Motorola. The X axis is the same labels from LDA and Labeled LDA and the Y axis is the number of bug reports.} +%\caption{Comparison of number of bug reports related to the same labels from LDA and labeled-LDA in Motorola. The X axis is the same labels from LDA and labeled-LDA and the Y axis is the number of bug reports.} %\end{figure} % %\begin{figure}[htb] %\centering %\includegraphics[width=0.4\textwidth]{htcratiosim.png} -%\caption{The comparison of ratio and similarity in HTC. The result of the smaller number of bug reports related to this label in LDA or Labeled LDA divided by the larger one is the ratio of this label. The X axis is the same labels from LDA and Labeled LDA.} +%\caption{The comparison of ratio and similarity in HTC. The result of the smaller number of bug reports related to this label in LDA or labeled-LDA divided by the larger one is the ratio of this label. The X axis is the same labels from LDA and labeled-LDA.} %\end{figure} % %\begin{figure}[htb] %\centering %\includegraphics[width=0.4\textwidth]{motoratiosim.png} -%\caption{The comparison of ratio and similarity in Motorola. The result of the smaller number of bug reports related to this label in LDA or Labeled LDA divided by the larger one is the ratio of this label. The X axis is the same labels from LDA and Labeled LDA.} +%\caption{The comparison of ratio and similarity in Motorola. The result of the smaller number of bug reports related to this label in LDA or labeled-LDA divided by the larger one is the ratio of this label. The X axis is the same labels from LDA and labeled-LDA.} %\end{figure} \begin{figure*}[htb] @@ -1020,25 +1018,25 @@ \section{Discussion of Fragmentation} %\begin{figure}[!htb] %\centering %\includegraphics[width=0.4\textwidth]{htcldallda.png} -%\caption{Comparison of number of bug reports related to the same labels from LDA and Labeled LDA in HTC. The X axis is the same labels from LDA and Labeled LDA and the Y axis is the number of bug reports.} +%\caption{Comparison of number of bug reports related to the same labels from LDA and labeled-LDA in HTC. The X axis is the same labels from LDA and labeled-LDA and the Y axis is the number of bug reports.} %\end{figure} % %\begin{figure}[!htb] %\centering %\includegraphics[width=0.4\textwidth]{motoldallda.png} -%\caption{Comparison of number of bug reports related to the same labels from LDA and Labeled LDA in Motorola. The X axis is the same labels from LDA and Labeled LDA and the Y axis is the number of bug reports.} +%\caption{Comparison of number of bug reports related to the same labels from LDA and labeled-LDA in Motorola. The X axis is the same labels from LDA and labeled-LDA and the Y axis is the number of bug reports.} %\end{figure} % %\begin{figure}[htb] %\centering %\includegraphics[width=0.4\textwidth]{htcratiosim.png} -%\caption{The comparison of ratio and similarity in HTC. The result of the smaller number of bug reports related to this label in LDA or Labeled LDA divided by the larger one is the ratio of this label. The X axis is the same labels from LDA and Labeled LDA.} +%\caption{The comparison of ratio and similarity in HTC. The result of the smaller number of bug reports related to this label in LDA or labeled-LDA divided by the larger one is the ratio of this label. The X axis is the same labels from LDA and labeled-LDA.} %\end{figure} % %\begin{figure}[htb] %\centering %\includegraphics[width=0.4\textwidth]{motoratiosim.png} -%\caption{The comparison of ratio and similarity in Motorola. The result of the smaller number of bug reports related to this label in LDA or Labeled LDA divided by the larger one is the ratio of this label. The X axis is the same labels from LDA and Labeled LDA.} +%\caption{The comparison of ratio and similarity in Motorola. The result of the smaller number of bug reports related to this label in LDA or labeled-LDA divided by the larger one is the ratio of this label. The X axis is the same labels from LDA and Labeled LDA.} %\end{figure} @@ -1079,47 +1077,47 @@ \section{Comparing of LDA and Labeled-LDA} \begin{figure*}[htb] \centering \includegraphics[width=1\textwidth]{htcsim.png} -\caption{Jaccard similarity of labels between LDA and Labeled-LDA in HTC. X axis is the labels in Labeled LDA and Y axis is the labels of topics generated by LDA. The label ``null" in the Y axis means that topic cannot be labeled. The result is based on the HTC bug reports under the threshold of document relevance of 0.2. Brighter means higher Jaccard similarity.} +\caption{Jaccard similarity of labels between LDA and Labeled-LDA in HTC. X axis is the labels in labeled-LDA and Y axis is the labels of topics generated by LDA. The label ``null" in the Y axis means that topic cannot be labeled. The result is based on the HTC bug reports under the threshold of document relevance of 0.2. Brighter means higher Jaccard similarity.} \label{similarityhtc} \end{figure*} \begin{figure*}[htb] \centering \includegraphics[width=1\textwidth]{motosim.png} -\caption{Jaccard similarity of labels between LDA and Labeled-LDA in Motorola. X axis is the labels in Labeled LDA and Y axis is the labels of topics generated by LDA. The label ``null" in the Y axis means that topic cannot be labeled. The result is based on the Motorola bug reports under the threshold of document relevance of 0.2. Brighter means higher Jaccard similarity.} +\caption{Jaccard similarity of labels between LDA and Labeled-LDA in Motorola. X axis is the labels in labeled-LDA and Y axis is the labels of topics generated by LDA. The label ``null" in the Y axis means that topic cannot be labeled. The result is based on the Motorola bug reports under the threshold of document relevance of 0.2. Brighter means higher Jaccard similarity.} \label{similaritymoto} \end{figure*} \begin{figure}[htb] \centering \includegraphics[width=0.5\textwidth]{htcldallda.png} -\caption{Comparison of number of bug reports related to the same labels from LDA and Labeled LDA in HTC. The X axis is the same labels from LDA and Labeled LDA and the Y axis is the number of bug reports.} +\caption{Comparison of number of bug reports related to the same labels from LDA and labeled-LDA in HTC. The X axis is the same labels from LDA and labeled-LDA and the Y axis is the number of bug reports.} \label{bughtc} \end{figure} \begin{figure}[!htb] \centering \includegraphics[width=0.5\textwidth]{motoldallda.png} -\caption{Comparison of number of bug reports related to the same labels from LDA and Labeled LDA in Motorola. The X axis is the same labels from LDA and Labeled LDA and the Y axis is the number of bug reports.} +\caption{Comparison of number of bug reports related to the same labels from LDA and labeled-LDA in Motorola. The X axis is the same labels from LDA and labeled-LDA and the Y axis is the number of bug reports.} \label{bugmoto} \end{figure} -%Most of the same labels from LDA and Labeled LDA have the comparable amount of bug reports. For example, the label ``calling'' from the HTC bug reports has exactly the same number of bugs related to for both results of LDA and Labeled LDA. However, the similarity of these related bug reports in terms of this label ``calling'' is very low which means LDA and Labeled LDA related quite different bug reports to this label. When doing this comparison, we cannot ignore the number of bugs that related to each label from both two techniques. That is, for one label, the ratio (the smaller number is divided by the bigger number so the ratio is always less or equal to one) of the number of bug reports related to this label predicted by LDA to that of Labeled LDA would be the upper bound of the similarity value. From Figure \ref{bughtc} and Figure \ref{bugmoto} that the relation between topics and each bug report modeled by LDA is quite different from the results generated by Labeled LDA. +%Most of the same labels from LDA and labeled-LDA have the comparable amount of bug reports. For example, the label ``calling'' from the HTC bug reports has exactly the same number of bugs related to for both results of LDA and Labeled LDA. However, the similarity of these related bug reports in terms of this label ``calling'' is very low which means LDA and Labeled LDA related quite different bug reports to this label. When doing this comparison, we cannot ignore the number of bugs that related to each label from both two techniques. That is, for one label, the ratio (the smaller number is divided by the bigger number so the ratio is always less or equal to one) of the number of bug reports related to this label predicted by LDA to that of Labeled LDA would be the upper bound of the similarity value. From Figure \ref{bughtc} and Figure \ref{bugmoto} that the relation between topics and each bug report modeled by LDA is quite different from the results generated by Labeled LDA. %The similarity values for these labels in Figure \ref{similaritymoto} are quite low compared with the ratio. Only about ten labels in HTC have similarity values that are larger than half of the ratio. For Motorola, the similarity values are all very low compared with the upper bound of the similarity values. -We can conclude that only few of the bug reports in HTC and Motorola are predicted by LDA and Labeled LDA to be related to the same labels. In other words, the relation between topics and each bug report modeled by LDA is quite different from the results generated by Labeled-LDA. We think the manual efforts of labeling all the bug reports would help us gain the better topic models generated by Labeled-LDA. +We can conclude that only few of the bug reports in HTC and Motorola are predicted by LDA and labeled-LDA to be related to the same labels. In other words, the relation between topics and each bug report modeled by LDA is quite different from the results generated by Labeled-LDA. We think the manual efforts of labeling all the bug reports would help us gain the better topic models generated by Labeled-LDA. %\begin{figure}[htb] %\centering %\includegraphics[width=0.5\textwidth]{htcratiosim.png} -%\caption{The comparison of ratio and similarity in HTC. The result of the smaller number of bug reports related to this label in LDA or Labeled LDA divided by the larger one is the ratio of this label. The X axis is the same labels from LDA and Labeled LDA.} +%\caption{The comparison of ratio and similarity in HTC. The result of the smaller number of bug reports related to this label in LDA or Labeled LDA divided by the larger one is the ratio of this label. The X axis is the same labels from LDA and labeled-LDA.} %\end{figure} %\begin{figure}[htb] %\centering %\includegraphics[width=0.5\textwidth]{motoratiosim.png} -%\caption{The comparison of ratio and similarity in Motorola. The result of the smaller number of bug reports related to this label in LDA or Labeled LDA divided by the larger one is the ratio of this label. The X axis is the same labels from LDA and Labeled LDA.} +%\caption{The comparison of ratio and similarity in Motorola. The result of the smaller number of bug reports related to this label in LDA or labeled-LDA divided by the larger one is the ratio of this label. The X axis is the same labels from LDA and labeled-LDA.} %\end{figure} @@ -1244,9 +1242,9 @@ \section{Threats to validity} \section{Conclusion and Future Work} -In this paper we studied Android bug reports for two vendors, HTC and Motorola. Based on topic analysis using Labeled-LDA on a corpus of manually tagged bug reports with multiple labels, we extracted the top 18 topics and categorized them into \textit{Common Troubled Topics}, \textit{Common Improved Topics} and \textif{Unique Topics} for both vendors. The \textit{Common Troubled Topics} show that there is no correlation between the troubled features of Android and Android evolution. In other words, there may be the incompatibility problem existing to the specific features of Android. The \textit{Common Improved Topics} show that some features within the same vendors have portability issues across their multiple devices. The \textit{Unique Topics} show that different vendor has specific bug topics which imply there may be the portability problem on the different vendors with the same Android version. Furthermore, we found that the manual efforts of labeling all the bug reports would help us gain the better topic models generated by Labeled-LDA after comparing LDA and Label-LDA. +In this paper we studied Android bug reports for two vendors, HTC and Motorola. Based on topic analysis using Labeled-LDA on a corpus of manually tagged bug reports with multiple labels, we extracted the top 18 topics and categorized them into \textit{Common Troubled Topics}, \textit{Common Improved Topics} and \textit{Unique Topics} for both vendors. The \textit{Common Troubled Topics} show that there is no correlation between the troubled features of Android and Android evolution. In other words, there may be the incompatibility problems existing to the specific features of Android. The \textit{Common Improved Topics} show that some features within the same vendors have portability issues across their multiple devices. The \textit{Unique Topics} show that different vendor has specific bug topics which imply there may be the portability problem on the different vendors. Furthermore, we found that the manual efforts of labeling all the bug reports would help us gain the better topic models generated by Labeled-LDA after comparing the topic modles generated by LDA and Label-LDA. -For our future work, we will use the name of each hardware model as a label to do topic analysis while applying our methodology in order to discover the effects of different Android versions with respect to compatibility and stability. We will plan to investigate more vendors in order to reveal vendor specific bug topics. +For our future work, we plan to use the name of each hardware model and Android versions as the labels to do topic analysis while applying our methodology in order to discover the effects of different Android versions with respect to compatibility and stability. We also plan to investigate more vendors in order to reveal vendor specific bug topics. %\subsubsection{Multi-labeling} %Subsubsection text here.