diff --git a/.gitignore b/.gitignore index ce64169..a754343 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,3 @@ -doc.yaml *.swp *.rbc coverage @@ -10,4 +9,21 @@ doc examples/images/* examples/*.html web/upload_task.rb -.idea + +# From standard gemfile +*.gem +.bundle +.config +.yardoc +Gemfile.lock +InstalledFiles +_yardoc +coverage +doc/ +lib/bundler/man +pkg +rdoc +spec/reports +test/tmp +test/version_tmp +tmp diff --git a/Gemfile b/Gemfile index ea8fc56..b42b1ad 100644 --- a/Gemfile +++ b/Gemfile @@ -1,18 +1,3 @@ source "https://www.rubygems.org" -gem 'minitest' -gem 'rdoc' -gem 'mocha', '0.14.0' #:require=>'mocha/setup' -gem 'shoulda','3.5.0' -gem 'shoulda-matchers','2.2.0' -gem 'hoe' -#gem 'bio-statsample-timeseries' -gem 'reportbuilder' -gem 'dirty-memoize' -gem 'distribution' -gem 'extendmatrix' -gem 'minimization' -gem 'rserve-client' -gem 'rubyvis' -gem 'spreadsheet' -gem 'rb-gsl' -gem 'awesome_print' + +gemspec diff --git a/Gemfile.lock b/Gemfile.lock deleted file mode 100644 index ef5d88d..0000000 --- a/Gemfile.lock +++ /dev/null @@ -1,81 +0,0 @@ -GEM - remote: https://www.rubygems.org/ - specs: - activesupport (4.1.6) - i18n (~> 0.6, >= 0.6.9) - json (~> 1.7, >= 1.7.7) - minitest (~> 5.1) - thread_safe (~> 0.1) - tzinfo (~> 1.1) - awesome_print (1.2.0) - clbustos-rtf (0.4.2) - dirty-memoize (0.0.4) - distribution (0.7.1) - extendmatrix (0.3.1) - hoe (3.13.0) - rake (>= 0.8, < 11.0) - i18n (0.6.11) - json (1.8.1) - metaclass (0.0.4) - minimization (0.2.1) - rb-gsl (~> 1.2) - text-table (~> 1.2) - minitest (5.4.2) - mocha (0.14.0) - metaclass (~> 0.0.1) - narray (0.6.0.9) - prawn (0.8.4) - prawn-core (>= 0.8.4, < 0.9) - prawn-layout (>= 0.8.4, < 0.9) - prawn-security (>= 0.8.4, < 0.9) - prawn-core (0.8.4) - prawn-layout (0.8.4) - prawn-security (0.8.4) - prawn-svg (0.9.1.11) - prawn (>= 0.8.4) - rake (10.3.2) - rb-gsl (1.16.0.2) - narray (>= 0.5.9) - rdoc (4.1.2) - json (~> 1.4) - reportbuilder (1.4.2) - clbustos-rtf (~> 0.4.0) - prawn (~> 0.8.4) - prawn-svg (~> 0.9.1) - text-table (~> 1.2) - rserve-client (0.3.1) - ruby-ole (1.2.11.7) - rubyvis (0.6.1) - shoulda (3.5.0) - shoulda-context (~> 1.0, >= 1.0.1) - shoulda-matchers (>= 1.4.1, < 3.0) - shoulda-context (1.2.1) - shoulda-matchers (2.2.0) - activesupport (>= 3.0.0) - spreadsheet (1.0.0) - ruby-ole (>= 1.0) - text-table (1.2.3) - thread_safe (0.3.4) - tzinfo (1.2.2) - thread_safe (~> 0.1) - -PLATFORMS - ruby - -DEPENDENCIES - awesome_print - dirty-memoize - distribution - extendmatrix - hoe - minimization - minitest - mocha (= 0.14.0) - rb-gsl - rdoc - reportbuilder - rserve-client - rubyvis - shoulda (= 3.5.0) - shoulda-matchers (= 2.2.0) - spreadsheet diff --git a/Rakefile b/Rakefile index d4e23b9..8b483ab 100644 --- a/Rakefile +++ b/Rakefile @@ -7,6 +7,7 @@ require 'rubygems' require 'statsample' require 'hoe' require 'rdoc' +require "bundler/gem_tasks" Hoe.plugin :git Hoe.plugin :doofus diff --git a/examples/correlation_matrix.rb b/examples/correlation_matrix.rb index 844e859..6feb065 100644 --- a/examples/correlation_matrix.rb +++ b/examples/correlation_matrix.rb @@ -11,6 +11,12 @@ 'd'=>rnorm(samples)) cm=cor(ds) summary(cm) + + cp = corp(ds) + summary(cp) + + cov = cov(ds) + summary(cov) end if __FILE__==$0 diff --git a/lib/statsample/bivariate.rb b/lib/statsample/bivariate.rb index d24e5ff..5115a16 100644 --- a/lib/statsample/bivariate.rb +++ b/lib/statsample/bivariate.rb @@ -71,6 +71,12 @@ def t_pearson(v1,v2) # giving r and vector size # Source : http://faculty.chass.ncsu.edu/garson/PA765/correl.htm def t_r(r,size) + raise "In computing value for t test for a pearson correlation, invalid size of series: #{size}" if size <= 2 + raise "In computing value for t test for a pearson correlation, invalid value of r: #{r}" if r >= 1.1 + if r >= 1 + puts "StatSample::Bivariate#t_r: got an R value > 1 (#{r}), so substituting 0.999999" + r = 0.999999 + end r * Math::sqrt(((size)-2).to_f / (1 - r**2)) end # Retrieves the probability value (a la SPSS) @@ -251,7 +257,11 @@ def correlation_probability_matrix(ds, tails=:both) (row==col or ds[row].type!=:scale or ds[col].type!=:scale) ? nil : prop_pearson(t_pearson(ds[row],ds[col]), v1a.size, tails) end end - Matrix.rows(rows) + m = Matrix.rows(rows) + m.extend(Statsample::CovariateMatrix) + m.fields=ds.fields + m.name = "Correlation Probability" + m end # Spearman ranked correlation coefficient (rho) between 2 vectors diff --git a/lib/statsample/matrix.rb b/lib/statsample/matrix.rb index 662bd0a..3f2eea2 100644 --- a/lib/statsample/matrix.rb +++ b/lib/statsample/matrix.rb @@ -27,14 +27,14 @@ def to_dataset if defined? :eigenpairs alias_method :eigenpairs_ruby, :eigenpairs end - + if Statsample.has_gsl? # Optimize eigenpairs of extendmatrix module using gsl def eigenpairs to_gsl.eigenpairs end end - + def eigenvalues eigenpairs.collect {|v| v[0]} end @@ -44,11 +44,11 @@ def eigenvectors def eigenvectors_matrix Matrix.columns(eigenvectors) end - - - - + + + + def to_gsl out=[] self.row_size.times{|i| @@ -76,7 +76,7 @@ class Matrix def to_gsl self end - + def to_dataset f = (self.respond_to? :fields_y) ? fields_y : column_size.times.map {|i| _("VAR_%d") % (i+1) } ds=Statsample::Dataset.new(f) @@ -91,7 +91,7 @@ def to_dataset ds.name=self.name if self.respond_to? :name ds end - + def row_size size1 end @@ -110,18 +110,18 @@ def eigenvalues def eigenvectors eigenpairs.collect {|v| v[1]} end - + # Matrix sum of squares def mssq sum=0 to_v.each {|i| sum+=i**2} sum end - + def eigenvectors_matrix eigval, eigvec= GSL::Eigen.symmv(self) GSL::Eigen::symmv_sort(eigval, eigvec, GSL::Eigen::SORT_VAL_DESC) - eigvec + eigvec end def eigenpairs eigval, eigvec= GSL::Eigen.symmv(self) @@ -130,7 +130,7 @@ def eigenpairs [eigval[i],eigvec.get_col(i)] } end - + #def eigenpairs_ruby # self.to_matrix.eigenpairs_ruby #end @@ -158,7 +158,7 @@ def total_sum module Statsample # Module to add names to X and Y fields module NamedMatrix - include Summarizable + include Summarizable def fields raise "Should be square" if !square? @@ -178,10 +178,10 @@ def fields_y=(v) @fields_y=v end def fields_x - @fields_x||=row_size.times.collect {|i| _("X%d") % i} + @fields_x||=row_size.times.collect {|i| _("X%d") % i} end def fields_y - @fields_y||=column_size.times.collect {|i| _("Y%d") % i} + @fields_y||=column_size.times.collect {|i| _("Y%d") % i} end def name @@ -195,7 +195,7 @@ def get_new_name @@named_matrix+=1 _("Matrix %d") % @@named_matrix end - + end # Module to add method for variance/covariance and correlation matrices # == Usage @@ -209,15 +209,19 @@ module CovariateMatrix # Get type of covariate matrix. Could be :covariance or :correlation def _type if row_size==column_size - if row_size.times.find {|i| self[i,i]!=1.0} - :covariance - else - :correlation + @type ||= begin + if row_size.times.find { |i| self[i, i] == 1.0 } + :correlation + elsif row_size.times.find { |i| self[i, i].nil? } + :correlation_probability + else + :covariance + end end else @type end - + end def _type=(t) @type=t @@ -233,7 +237,7 @@ def correlation end } }) - matrix.extend CovariateMatrix + matrix.extend CovariateMatrix matrix.fields_x=fields_x matrix.fields_y=fields_y matrix._type=:correlation @@ -242,19 +246,19 @@ def correlation self end end - - + + # Get variance for field k # def variance(k) submatrix([k])[0,0] end - + def get_new_name @@covariatematrix+=1 _("Covariate matrix %d") % @@covariatematrix end - + # Select a submatrix of factors. If you have a correlation matrix # with a, b and c, you could obtain a submatrix of correlations of # a and b, b and c or a and b @@ -276,24 +280,24 @@ def submatrix(rows,columns=nil) raise ArgumentError, "rows shouldn't be empty" if rows.respond_to? :size and rows.size==0 columns||=rows # Convert all fields on index - row_index=rows.collect {|v| + row_index=rows.collect {|v| r=v.is_a?(Numeric) ? v : fields_x.index(v) raise "Index #{v} doesn't exists on matrix" if r.nil? r } - column_index=columns.collect {|v| + column_index=columns.collect {|v| r=v.is_a?(Numeric) ? v : fields_y.index(v) raise "Index #{v} doesn't exists on matrix" if r.nil? r } - - + + fx=row_index.collect {|v| fields_x[v]} fy=column_index.collect {|v| fields_y[v]} - + matrix= Matrix.rows(row_index.collect {|i| row=column_index.collect {|j| self[i,j]}}) - matrix.extend CovariateMatrix + matrix.extend CovariateMatrix matrix.fields_x=fx matrix.fields_y=fy matrix._type=_type diff --git a/lib/statsample/shorthand.rb b/lib/statsample/shorthand.rb index d4956f3..aeb3a48 100644 --- a/lib/statsample/shorthand.rb +++ b/lib/statsample/shorthand.rb @@ -40,9 +40,15 @@ def names(ds) def cor(ds) Statsample::Bivariate.correlation_matrix(ds) end + + # Create a correlation probability matrix from a dataset + def corp(ds) + Statsample::Bivariate.correlation_probability_matrix(ds) + end + # Create a variance/covariance matrix from a dataset def cov(ds) - Statsample::Bivariate.covariate_matrix(ds) + Statsample::Bivariate.covariance_matrix(ds) end # Create a Statsample::Vector # Analog to R's c diff --git a/lib/statsample/version.rb b/lib/statsample/version.rb index 4da66f2..ca7ab9a 100644 --- a/lib/statsample/version.rb +++ b/lib/statsample/version.rb @@ -1,3 +1,3 @@ module Statsample - VERSION = '1.4.0' + VERSION = '1.4.1' end diff --git a/statsample.gemspec b/statsample.gemspec new file mode 100644 index 0000000..712f6dd --- /dev/null +++ b/statsample.gemspec @@ -0,0 +1,35 @@ +# coding: utf-8 +lib = File.expand_path('../lib', __FILE__) +$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) +require 'statsample/version' + +Gem::Specification.new do |spec| + spec.name = 'statsample' + spec.version = Statsample::VERSION + spec.summary = "Stats library" + spec.authors = ["Claudio Bustos", "Justin Gordon", "Russell Smith"] + spec.homepage = 'https://github.com/clbustos/statsample' + spec.license = "MIT" + spec.files = `git ls-files -z`.split("\x0") + spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) } + spec.test_files = spec.files.grep(%r{^(test|spec|features)/}) + spec.require_paths = ["lib"] + + spec.add_development_dependency "bundler", "~> 1.5" + spec.add_development_dependency "rake" + spec.add_development_dependency 'minitest' + spec.add_development_dependency 'rdoc' + spec.add_development_dependency 'mocha', '0.14.0' #:require=>'mocha/setup' + spec.add_development_dependency 'shoulda', '3.5.0' + spec.add_development_dependency 'shoulda-matchers', '2.2.0' + + spec.add_dependency 'reportbuilder' + spec.add_dependency 'dirty-memoize' + spec.add_dependency 'distribution' + spec.add_dependency 'extendmatrix' + spec.add_dependency 'minimization' + spec.add_dependency 'rserve-client' + spec.add_dependency 'rubyvis' + spec.add_dependency 'spreadsheet' + spec.add_dependency 'rb-gsl' +end