-
-
Notifications
You must be signed in to change notification settings - Fork 2.4k
/
Copy pathfile_scraper.rb
59 lines (47 loc) · 1.39 KB
/
file_scraper.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
module Docs
class FileScraper < Scraper
SOURCE_DIRECTORY = File.expand_path '../../../../../docs', __FILE__
Response = Struct.new :body, :url
class << self
def inherited(subclass)
super
subclass.base_url = base_url
end
end
self.base_url = 'http://localhost/'
html_filters.push 'clean_local_urls'
def source_directory
@source_directory ||= File.join(SOURCE_DIRECTORY, self.class.path)
end
private
def assert_source_directory_exists
unless Dir.exist?(source_directory)
raise SetupError, "The #{self.class.name} scraper requires the original documentation files to be stored in the \"#{source_directory}\" directory."
end
end
def request_one(url)
assert_source_directory_exists
Response.new read_file(File.join(source_directory, url_to_path(url))), URL.parse(url)
end
def request_all(urls)
assert_source_directory_exists
queue = [urls].flatten
until queue.empty?
result = yield request_one(queue.shift)
queue.concat(result) if result.is_a? Array
end
end
def process_response?(response)
response.body.present?
end
def url_to_path(url)
url.remove(base_url.to_s)
end
def read_file(path)
File.read(path)
rescue
instrument 'warn.doc', msg: "Failed to open file: #{path}"
nil
end
end
end