diff --git a/Gemfile b/Gemfile index 1f7d997..e40b817 100644 --- a/Gemfile +++ b/Gemfile @@ -44,7 +44,7 @@ gem 'jquery-rails' # Build JSON APIs with ease. Read more: https://github.com/rails/jbuilder gem 'pry' - +gem 'pry-rails' # Use ActiveModel has_secure_password # gem 'bcrypt-ruby', '~> 3.1.2' diff --git a/Gemfile.lock b/Gemfile.lock index 4e6a5ee..ac2645a 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -165,6 +165,8 @@ GEM coderay (~> 1.1.0) method_source (~> 0.8.1) slop (~> 3.4) + pry-rails (0.3.4) + pry (>= 0.9.10) rack (1.6.4) rack-cors (0.3.1) rack-test (0.6.3) @@ -279,6 +281,7 @@ DEPENDENCIES open_uri_redirections pg pry + pry-rails rack-cors rails (= 4.2.3) rails-api diff --git a/app/models/tool.rb b/app/models/tool.rb index c5a4ddb..6c73239 100644 --- a/app/models/tool.rb +++ b/app/models/tool.rb @@ -13,6 +13,7 @@ class Tool < ActiveRecord::Base after_save :invalidate_cache has_and_belongs_to_many :users has_many :citations + has_many :tool_versions validates_uniqueness_of :url validates_presence_of :url validates_presence_of :name @@ -54,8 +55,9 @@ def check_health when :github contents = JSON.parse RestClient.get "https://api.github.com/repos/#{repo_name}/contents", {:params => - {:client_id => ENV['ST_GITHUB_CLIENT_ID'], - 'client_secret' => ENV['ST_GITHUB_CLIENT_SECRET'] + { + :client_id => ENV["GITHUB_CLIENT_ID"], + :client_secret => ENV["GITHUB_CLIENT_SECRET"] } } path_key = 'name' diff --git a/app/models/tool_version.rb b/app/models/tool_version.rb new file mode 100644 index 0000000..f50258c --- /dev/null +++ b/app/models/tool_version.rb @@ -0,0 +1,3 @@ +class ToolVersion < ActiveRecord::Base + belongs_to :tool +end diff --git a/db/migrate/20150705173334_create_tool_versions.rb b/db/migrate/20150705173334_create_tool_versions.rb new file mode 100644 index 0000000..dd0d25c --- /dev/null +++ b/db/migrate/20150705173334_create_tool_versions.rb @@ -0,0 +1,10 @@ +class CreateToolVersions < ActiveRecord::Migration + def change + create_table :tool_versions do |t| + t.integer :tool_id + t.string :url + + t.timestamps null: false + end + end +end diff --git a/db/schema.rb b/db/schema.rb index f3d56e3..e7ed4bb 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -11,7 +11,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 20150107134549) do +ActiveRecord::Schema.define(version: 20150705173334) do # These are extensions that must be enabled in order to support this database enable_extension "plpgsql" @@ -49,6 +49,13 @@ add_index "tags", ["name"], name: "index_tags_on_name", unique: true, using: :btree + create_table "tool_versions", force: :cascade do |t| + t.integer "tool_id" + t.string "url" + t.datetime "created_at", null: false + t.datetime "updated_at", null: false + end + create_table "tools", force: :cascade do |t| t.string "url" t.string "doi" diff --git a/lib/importer/repository.rb b/lib/importer/repository.rb index 2fd3493..0426cf9 100644 --- a/lib/importer/repository.rb +++ b/lib/importer/repository.rb @@ -1,5 +1,7 @@ module Importer class Repository + attr_reader :repo + def self.process(text, source) repos = text.scan(/(?:https?\:\/\/)(#{source})\/ ([^),.\/]+)\/([^,\s)(\/]+)\/?([^ )]*)/x) @@ -10,9 +12,19 @@ def self.process(text, source) end def initialize(repo) - @source = repo[0] - @username = repo[1].gsub(/\p{Z}/, "") - @repository_name = repo[2].gsub(/\.$/, "").gsub(/[\p{Z}​​]/, "") + @repo = repo + end + + def username + repo[1].gsub(/\p{Z}/, "") + end + + def source + repo[0] + end + + def repository_name + repo[2].gsub(/\.$/, "").gsub(/[\p{Z}​​]/, "") end def process diff --git a/lib/importer/zenodo.rb b/lib/importer/zenodo.rb new file mode 100644 index 0000000..bdc465e --- /dev/null +++ b/lib/importer/zenodo.rb @@ -0,0 +1,76 @@ +module Importer + class Zenodo + XMLNS = { xmlns: "http://datacite.org/schema/kernel-3" } + + # Long running + def self.import + new.import + end + + def import + loop do + @response = open( + api_url, "User-Agent" => USER_AGENT + ).read + + @response = Nokogiri::XML(@response) + @results = @response.css("record") + results = process_results + break if results.empty? + sleep 0.6 + end + end + + def resumption_token + @response.css("resumptionToken").text if @response + end + + def process_results + @results.map do |result| + process_result(result) + end + end + + def process_result(result) + doi_css = "xmlns|identifier[identifierType='DOI']" + relation_css = "xmlns|relatedIdentifier[relationType='IsSupplementTo']" + cited_css = "xmlns|relatedIdentifier[relationType='IsCitedBy']" + + # GitHub + result.css(relation_css, XMLNS).each do |url| + if url.text =~ /github.com/ + process_github_url(url.text) + else + doi = result.css(doi_css, XMLNS).first + # process_other("http://dx.doi.org/#{doi.text}") + end + end + + # Citations + # result.css(cited_css, XMLNS).first.try(:tap) do |url| + # end + + result + end + + def process_github_url(url) + url_parts = url.match(/.*github.com\/(.+?)\/(.+?)(\/|\z)/) + main_url = "https://github.com/#{url_parts[1]}/#{url_parts[2]}" + tool = Tool.where(url: main_url).first_or_create + tool_version = ToolVersion.where(url: url, tool: tool).first_or_create + end + + def process_other(url) + tool = Tool.where(url: url).first_or_create + end + + def api_url + base_url = "https://zenodo.org/oai2d?verb=ListRecords" + if resumption_token.present? + "#{base_url}&resumptionToken=#{resumption_token}" + else + "#{base_url}&metadataPrefix=oai_datacite3&set=software" + end + end + end +end diff --git a/spec/models/tool_version_spec.rb b/spec/models/tool_version_spec.rb new file mode 100644 index 0000000..7416f9e --- /dev/null +++ b/spec/models/tool_version_spec.rb @@ -0,0 +1,5 @@ +require "rails_helper" + +RSpec.describe ToolVersion, type: :model do + pending "add some examples to (or delete) #{__FILE__}" +end