From fdec17930ed6f05d2ca4e8f032da6b4e0377e421 Mon Sep 17 00:00:00 2001 From: Jalerson Lima Date: Wed, 12 Mar 2025 13:15:07 -0300 Subject: [PATCH 1/3] build scrapers --- lib/scrapers/generic.rb | 16 ++++ lib/scrapers/google/gallery.rb | 45 +++++++++++ lib/scrapers/google/image.rb | 33 ++++++++ lib/scrapers/google/image_replacer_script.rb | 49 ++++++++++++ spec/lib/scrapers/generic_spec.rb | 43 ++++++++++ spec/lib/scrapers/google/gallery_spec.rb | 49 ++++++++++++ .../google/image_replacer_script_spec.rb | 80 +++++++++++++++++++ spec/lib/scrapers/google/image_spec.rb | 63 +++++++++++++++ 8 files changed, 378 insertions(+) create mode 100644 lib/scrapers/generic.rb create mode 100644 lib/scrapers/google/gallery.rb create mode 100644 lib/scrapers/google/image.rb create mode 100644 lib/scrapers/google/image_replacer_script.rb create mode 100644 spec/lib/scrapers/generic_spec.rb create mode 100644 spec/lib/scrapers/google/gallery_spec.rb create mode 100644 spec/lib/scrapers/google/image_replacer_script_spec.rb create mode 100644 spec/lib/scrapers/google/image_spec.rb diff --git a/lib/scrapers/generic.rb b/lib/scrapers/generic.rb new file mode 100644 index 00000000..deb4d186 --- /dev/null +++ b/lib/scrapers/generic.rb @@ -0,0 +1,16 @@ +module Scrapers + class Generic + attr_accessor :selector, :processor + + DEFAULT_PROCESSOR_FN = ->(item) { item.text } + + def initialize(selector:, processor: DEFAULT_PROCESSOR_FN) + @selector = selector + @processor = processor + end + + def scrape(html) + @processor.call(html.css(@selector)) + end + end +end \ No newline at end of file diff --git a/lib/scrapers/google/gallery.rb b/lib/scrapers/google/gallery.rb new file mode 100644 index 00000000..a94b5ef7 --- /dev/null +++ b/lib/scrapers/google/gallery.rb @@ -0,0 +1,45 @@ +require_relative 'image' +require_relative 'image_replacer_script' +require_relative '../generic' + +module Scrapers + module Google + class Gallery + DEFAULT_SELECTOR = 'div.iELo6'.freeze + DEFAULT_SCRAPERS = { + name: Scrapers::Generic.new(selector: 'div.pgNMRc'), + extensions: Scrapers::Generic.new(selector: 'div.cxzHyb', processor: ->(div) { div.text.empty? ? nil : [div.text] }), + link: Scrapers::Generic.new(selector: 'a', processor: -> (links) { 'https://www.google.com' + links[0]&.attributes['href']&.text }), + image: Scrapers::Google::Image.new + } + DEFAULT_SCRIPT_SCRAPER = Scrapers::Google::ImageReplacerScript + + def initialize(parser:, selector: DEFAULT_SELECTOR, scrapers: DEFAULT_SCRAPERS, script_scraper: DEFAULT_SCRIPT_SCRAPER) + @parser = parser + @selector = selector + @scrapers = scrapers + @script_scraper = script_scraper.is_a?(Class) ? script_scraper.new : script_scraper + end + + def scrape(input) + html = @parser.HTML(input) + + output = [] + @scrapers[:image].image_map = @script_scraper.scrape(html) + + html.css(@selector).each do |item| + output_item = {} + + @scrapers.each_pair do |key, scraper| + value = scraper.scrape(item) + output_item[key] = value if value + end + + output << output_item + end + + output + end + end + end +end \ No newline at end of file diff --git a/lib/scrapers/google/image.rb b/lib/scrapers/google/image.rb new file mode 100644 index 00000000..e3c0bd29 --- /dev/null +++ b/lib/scrapers/google/image.rb @@ -0,0 +1,33 @@ +module Scrapers + module Google + class Image + DEFAULT_SELECTOR = 'img.taFZJe'.freeze + PLACEHOLDER = 'data:image/gif;base64,R0lGODlhAQABAIAAAP///////yH5BAEKAAEALAAAAAABAAEAAAICTAEAOw=='.freeze + + attr_accessor :selector, :image_map + + def initialize(selector: DEFAULT_SELECTOR) + @selector = selector + end + + def scrape(html) + images = html.css(@selector) + return if images.empty? + image = images[0] + + image_src = image.attributes['src']&.text + image_url = image_src + image_data_src = image.attributes['data-src']&.text + + if image_data_src + image_url = image_data_src + elsif image_src == PLACEHOLDER + image_id = image.attributes['id']&.text + image_url = image_map[image_id] if image_map&.key?(image_id) + end + + image_url + end + end + end +end \ No newline at end of file diff --git a/lib/scrapers/google/image_replacer_script.rb b/lib/scrapers/google/image_replacer_script.rb new file mode 100644 index 00000000..4c9f0be8 --- /dev/null +++ b/lib/scrapers/google/image_replacer_script.rb @@ -0,0 +1,49 @@ +module Scrapers + module Google + class ImageReplacerScript + DEFAULT_SELECTOR = "script".freeze + IMAGE_REPLACER_FN = "_setImagesSrc".freeze + IMAGE_DATA_PATTERN = /s='(.*?)';.*?var ii=\['(.*?)'\]/ + + attr_accessor :selector, :image_replacer_fn + + def initialize(selector: DEFAULT_SELECTOR, image_replacer_fn: IMAGE_REPLACER_FN) + @selector = selector + @image_replacer_fn = image_replacer_fn + end + + def scrape(html) + return {} if html.nil? + + scrape_image_map(scrape_image_replacer_script(html)) + end + + private + + def scrape_image_replacer_script(html) + html.css(@selector) + .select { |script| script.text.include?(@image_replacer_fn) } + .map(&:text) + .join + end + + def scrape_image_map(script) + return {} if script.empty? + + matches = script.scan(IMAGE_DATA_PATTERN) + return {} if matches.empty? + + image_map = {} + matches.each do |base64, image_id| + image_map[image_id] = sanitize(base64) + end + + image_map + end + + def sanitize(base64) + base64.gsub(/\\x3d/, "=") + end + end + end +end \ No newline at end of file diff --git a/spec/lib/scrapers/generic_spec.rb b/spec/lib/scrapers/generic_spec.rb new file mode 100644 index 00000000..ff087229 --- /dev/null +++ b/spec/lib/scrapers/generic_spec.rb @@ -0,0 +1,43 @@ +require 'nokolexbor' +require 'scrapers/generic' + +RSpec.describe Scrapers::Generic do + let(:html) { Nokolexbor::HTML('
Hello World
') } + let(:selector) { '.sample' } + + describe '#initialize' do + it 'creates an instance of the scraper' do + scraper = described_class.new(selector: selector) + expect(scraper.processor).to eq(described_class::DEFAULT_PROCESSOR_FN) + end + + context 'when using a custom processor' do + it 'creates the scraper using the customer processor' do + custom_processor = ->(item) { item.text.upcase } + scraper = described_class.new(selector: selector, processor: custom_processor) + + expect(scraper.selector).to eq(selector) + expect(scraper.processor).to eq(custom_processor) + end + end + end + + describe '#scrape' do + context 'using the default processor' do + it 'scrapes the content using the selector and the default processor' do + scraper = described_class.new(selector: selector) + + expect(scraper.scrape(html)).to eq('Hello World') + end + end + + context 'using a custom processor' do + it 'scrapes the content using the custom processor' do + custom_processor = ->(item) { item.text.upcase } + scraper = described_class.new(selector: selector, processor: custom_processor) + + expect(scraper.scrape(html)).to eq('HELLO WORLD') + end + end + end +end \ No newline at end of file diff --git a/spec/lib/scrapers/google/gallery_spec.rb b/spec/lib/scrapers/google/gallery_spec.rb new file mode 100644 index 00000000..3076d1b7 --- /dev/null +++ b/spec/lib/scrapers/google/gallery_spec.rb @@ -0,0 +1,49 @@ +require 'nokolexbor' +require 'scrapers/google/gallery' +require 'json' + +RSpec.describe Scrapers::Google::Gallery do + let(:parser) { Nokolexbor } + + describe '#scrape' do + context 'Van Gogh artwork' do + let(:scraper) { described_class.new(parser: parser) } + + it 'scrapes the artwork' do + input_file_content = File.read(File.join(__dir__, '../../../../files/van-gogh-paintings.html')) + expected_file_content = File.read(File.join(__dir__, '../../../../files/expected-array.json')) + expected_artworks = JSON.parse(expected_file_content, symbolize_names: true) + + artworks = scraper.scrape(input_file_content) + expect({artworks: artworks}).to eq(expected_artworks) + end + end + + context 'Picasso paintings' do + let(:scraper) { described_class.new(parser: parser) } + + it 'scrapes the paintings' do + input_file_content = File.read(File.join(__dir__, '../../../fixtures/picasso-paintings.html')) + expected_file_content = File.read(File.join(__dir__, '../../../fixtures/expected-picasso-paintings.json')) + expected_artworks = JSON.parse(expected_file_content, symbolize_names: true) + + artworks = scraper.scrape(input_file_content) + + expect({artworks: artworks}).to eq(expected_artworks) + end + end + + context 'Steve McCurry photos' do + let(:scraper) { described_class.new(parser: parser) } + + it 'scrapes the paintings' do + input_file_content = File.read(File.join(__dir__, '../../../fixtures/steve-mccurry-photos.html')) + expected_file_content = File.read(File.join(__dir__, '../../../fixtures/expected-steve-mccurry-photos.json')) + expected_artworks = JSON.parse(expected_file_content, symbolize_names: true) + + photos = scraper.scrape(input_file_content) + expect({photos: photos}).to eq(expected_artworks) + end + end + end +end \ No newline at end of file diff --git a/spec/lib/scrapers/google/image_replacer_script_spec.rb b/spec/lib/scrapers/google/image_replacer_script_spec.rb new file mode 100644 index 00000000..f2166346 --- /dev/null +++ b/spec/lib/scrapers/google/image_replacer_script_spec.rb @@ -0,0 +1,80 @@ +require 'nokolexbor' +require 'scrapers/google/image_replacer_script' + +RSpec.describe Scrapers::Google::ImageReplacerScript do + let(:replacer) { described_class.new } + + describe '#initialize' do + context 'using the default values' do + it 'creates a scraper using the default values' do + expect(replacer.selector).to eq(described_class::DEFAULT_SELECTOR) + expect(replacer.image_replacer_fn).to eq(described_class::IMAGE_REPLACER_FN) + end + end + + context 'using a custom selector and func name' do + it 'creates a scraper using custom values' do + scraper = described_class.new(selector: 'div.sample', image_replacer_fn: 'myFunc') + + expect(scraper.selector).to eq('div.sample') + expect(scraper.image_replacer_fn).to eq('myFunc') + end + end + end + + describe '#scrape' do + context 'when html is nil' do + it 'returns empty hash' do + expect(replacer.scrape(nil)).to eq({}) + end + end + + context 'with valid html' do + let(:html) do + Nokolexbor::HTML(<<~HTML) + + + + + HTML + end + + it 'extracts image mappings' do + expected = { + 'image1' => 'base64data==', + 'image2' => 'otherdata=' + } + expect(replacer.scrape(html)).to eq(expected) + end + end + + context 'with custom function name' do + let(:replacer) { described_class.new(image_replacer_fn: '_customFn') } + let(:html) do + Nokolexbor::HTML(<<~HTML) + + + + HTML + end + + it 'scrapes the correct data' do + expect(replacer.scrape(html)).to eq({'image3' => 'testdata='}) + end + end + + context 'with no matching script' do + let(:html) { Nokolexbor::HTML('') } + + it 'returns empty hash' do + expect(replacer.scrape(html)).to eq({}) + end + end + end +end \ No newline at end of file diff --git a/spec/lib/scrapers/google/image_spec.rb b/spec/lib/scrapers/google/image_spec.rb new file mode 100644 index 00000000..921b70e7 --- /dev/null +++ b/spec/lib/scrapers/google/image_spec.rb @@ -0,0 +1,63 @@ +require 'nokolexbor' +require 'scrapers/google/image' + +RSpec.describe Scrapers::Google::Image do + let(:scraper) { described_class.new } + let(:html) { Nokolexbor::HTML(html_content) } + + describe '#initialize' do + context 'with the default selector' do + it 'creates a scraper using the default selector' do + scraper = described_class.new + + expect(scraper.selector).to eq(described_class::DEFAULT_SELECTOR) + end + end + + context 'with a custom selector' do + it 'creates a scraper using the custom selector' do + scraper = described_class.new(selector: '.custom-class') + + expect(scraper.selector).to eq('.custom-class') + end + end + end + + describe '#scrape' do + context 'when no matching images found' do + let(:html_content) { '
No images here
' } + + it 'returns nil' do + expect(scraper.scrape(html)).to be_nil + end + end + + context 'with regular image src' do + let(:html_content) { '' } + + it 'returns the src url' do + expect(scraper.scrape(html)).to eq('image.jpg') + end + end + + context 'with data-src attribute' do + let(:html_content) { '' } + + it 'returns the data-src url' do + expect(scraper.scrape(html)).to eq('real-image.jpg') + end + end + + context 'with placeholder image' do + let(:html_content) { "" } + + before do + scraper.image_map = {'img1' => 'mapped-image.jpg'} + end + + it 'returns mapped image url' do + expect(scraper.scrape(html)).to eq('mapped-image.jpg') + end + end + end +end \ No newline at end of file From fd3a3f8839d43ebe8c3ff9539508057d5c22d256 Mon Sep 17 00:00:00 2001 From: Jalerson Lima Date: Wed, 12 Mar 2025 13:15:25 -0300 Subject: [PATCH 2/3] add test samples --- spec/fixtures/expected-picasso-paintings.json | 274 ++++++++++++++++++ .../expected-steve-mccurry-photos.json | 16 + spec/fixtures/picasso-paintings.html | 52 ++++ spec/fixtures/steve-mccurry-photos.html | 74 +++++ 4 files changed, 416 insertions(+) create mode 100644 spec/fixtures/expected-picasso-paintings.json create mode 100644 spec/fixtures/expected-steve-mccurry-photos.json create mode 100644 spec/fixtures/picasso-paintings.html create mode 100644 spec/fixtures/steve-mccurry-photos.html diff --git a/spec/fixtures/expected-picasso-paintings.json b/spec/fixtures/expected-picasso-paintings.json new file mode 100644 index 00000000..be87d5cc --- /dev/null +++ b/spec/fixtures/expected-picasso-paintings.json @@ -0,0 +1,274 @@ +{ + "artworks": [ + { + "name": "Guernica", + "extensions": ["1937"], + "link": "https://www.google.com/search?sca_esv=d856a50ff87a9623&cs=0&q=Guernica+(Picasso)&stick=H4sIAAAAAAAAAONgFuLQz9U3MDOIN1fiBLFMjAxzTLWUspOt9Msyi0sTc-ITi0qQmJnFJVbl-UXZxYtYhdxLU4vyMpMTFTQCgGRxcb4mAC4Q9W5QAAAA&sa=X&ved=2ahUKEwjfl83q8oSMAxV1r5UCHQSnPf0Qtq8DegQIBhAD", + "image": "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRstwGXbSA9Fxfl601xrf76CQVHYnpmAu5OUMwGSgALL87wg9HE" + }, + { + "name": "The Old Guitarist", + "extensions": ["1904"], + "link": "https://www.google.com/search?sca_esv=d856a50ff87a9623&cs=0&q=The+Old+Guitarist&stick=H4sIAAAAAAAAAONgFuLQz9U3MDOIN1fiBLEsTM0KC7WUspOt9Msyi0sTc-ITi0qQmJnFJVbl-UXZxYtYBUMyUhX8c1IU3EszSxKLgDIA9X8Wz08AAAA&sa=X&ved=2ahUKEwjfl83q8oSMAxV1r5UCHQSnPf0Qtq8DegQIBhAF", + "image": "https://encrypted-tbn2.gstatic.com/images?q=tbn:ANd9GcT8z6q-0Z3e2iwfVKF8hk-ln1SEjhWyBLYTE5iDXk3ZpmneG0D_" + }, + { + "name": "Girl before a Mirror", + "extensions": ["1932"], + "link": "https://www.google.com/search?sca_esv=d856a50ff87a9623&cs=0&q=Girl+before+a+Mirror&stick=H4sIAAAAAAAAAONgFuLQz9U3MDOIN1fiArGMCwurCrK1lLKTrfTLMotLE3PiE4tKkJiZxSVW5flF2cWLWEXcM4tyFJJS0_KLUhUSFXwzi4ryiwDxlblBUwAAAA&sa=X&ved=2ahUKEwjfl83q8oSMAxV1r5UCHQSnPf0Qtq8DegQIBhAH", + "image": "https://encrypted-tbn1.gstatic.com/images?q=tbn:ANd9GcTwVTQy_X2osffv3dXxxf_CrHZQBnT_Qm0DEHrHNTgoVo7z4KPn" + }, + { + "name": "Les Demoiselles d’Avignon", + "extensions": ["1907"], + "link": "https://www.google.com/search?sca_esv=d856a50ff87a9623&cs=0&q=Les+Demoiselles+d%E2%80%99Avignon&stick=H4sIAAAAAAAAAONgFuLQz9U3MDOIN1fiBLFMq7JMcrSUspOt9Msyi0sTc-ITi0qQmJnFJVbl-UXZxYtYpX1SixVcUnPzM4tTc3KA7JRHDTMdyzLT8_LzAMn1z6ZZAAAA&sa=X&ved=2ahUKEwjfl83q8oSMAxV1r5UCHQSnPf0Qtq8DegQIBhAJ", + "image": "https://encrypted-tbn3.gstatic.com/images?q=tbn:ANd9GcRkMWuz31hIU_4X9okeqaXFLFLuGdecGi6Yo5NZPdfL4q6z6hPt" + }, + { + "name": "Dove of Peace", + "extensions": ["1949"], + "link": "https://www.google.com/search?sca_esv=d856a50ff87a9623&cs=0&q=Dove+(Picasso)&stick=H4sIAAAAAAAAAONgFuLQz9U3MDOIN1fi1U_XNzTMybEwiTezKNFSyk620i_LLC5NzIlPLCpBYmYWl1iV5xdlFy9i5XPJL0tV0AjITE4sLs7XBACbteK5UAAAAA&sa=X&ved=2ahUKEwjfl83q8oSMAxV1r5UCHQSnPf0Qtq8DegQIBhAL", + "image": "https://encrypted-tbn1.gstatic.com/images?q=tbn:ANd9GcTJtIhDtRroGh90Hb6IMsveC6ErkAYFS4_3ihJ6apWrPTtfzw4a" + }, + { + "name": "Le Rêve", + "extensions": ["1932"], + "link": "https://www.google.com/search?sca_esv=d856a50ff87a9623&cs=0&q=Le+R%C3%AAve+(Picasso)&stick=H4sIAAAAAAAAAONgFuLQz9U3MDOIN1fiArGMzEzNTMy1lLKTrfTLMotLE3PiE4tKkJiZxSVW5flF2cWLWIV8UhWCDq8qS1XQCMhMTiwuztcEAPuh_H1RAAAA&sa=X&ved=2ahUKEwjfl83q8oSMAxV1r5UCHQSnPf0Qtq8DegQIBhAN", + "image": "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTXDIrTVvK9sRZr9u-LiB5jFRCuaQWPfpcdKqdkBUViJIrRp1ta" + }, + { + "name": "Portrait of Dora Maar", + "extensions": ["1937"], + "link": "https://www.google.com/search?sca_esv=d856a50ff87a9623&cs=0&q=Portrait+of+Dora+Maar&stick=H4sIAAAAAAAAAONgFuLQz9U3MDOIN1fi1k_XNzQyKqw0zzDQUspOttIvyywuTcyJTywqQWJmFpdYlecXZRcvYhUNyC8qKUrMLFHIT1NwyS9KVPBNTCwCANRroJRVAAAA&sa=X&ved=2ahUKEwjfl83q8oSMAxV1r5UCHQSnPf0Qtq8DegQIBhAP", + "image": "https://encrypted-tbn2.gstatic.com/images?q=tbn:ANd9GcThbFVLCW1Jdprc3AwE4F1WRcsCiD0rj2E4O7DgtqSMf3ppHcu3" + }, + { + "name": "Don Quixote", + "extensions": ["1955"], + "link": "https://www.google.com/search?sca_esv=d856a50ff87a9623&cs=0&q=Don+Quixote+(Picasso)&stick=H4sIAAAAAAAAAONgFuLQz9U3MDOIN1fiArGSkipKLC21lLKTrfTLMotLE3PiE4tKkJiZxSVW5flF2cWLWEVd8vMUAkszK_JLUhU0AjKTE4uL8zUB0LmeTVQAAAA&sa=X&ved=2ahUKEwjfl83q8oSMAxV1r5UCHQSnPf0Qtq8DegQIBhAR", + "image": "https://encrypted-tbn3.gstatic.com/images?q=tbn:ANd9GcRxnZZIlUSmERgkSHeoj9GiKp1H9bmTe_ggYHcA_ZZqcBFlC6gW" + }, + { + "name": "The Kiss", + "extensions": ["1925"], + "link": "https://www.google.com/search?sca_esv=d856a50ff87a9623&cs=0&q=The+Kiss&stick=H4sIAAAAAAAAAONgFuLQz9U3MDOIN1fi1k_XNzQyTCkrKSnQUspOttIvyywuTcyJTywqQWJmFpdYlecXZRcvYuUIyUhV8M4sLgYANBx8SkgAAAA&sa=X&ved=2ahUKEwjfl83q8oSMAxV1r5UCHQSnPf0Qtq8DegQIBhAT", + "image": "https://encrypted-tbn2.gstatic.com/images?q=tbn:ANd9GcQvZVQdGHKBGKbpi9-BXvySTahVyoFRT3NiGMacxUgQ_qgSFvoJ" + }, + { + "name": "Bull's Head", + "extensions": ["1942"], + "link": "https://www.google.com/search?sca_esv=d856a50ff87a9623&cs=0&q=Bull%27s+Head&stick=H4sIAAAAAAAAAONgFuLQz9U3MDOIN1fiArHykixKcgq1lLKTrfTLMotLE3PiE4tKkJiZxSVW5flF2cWLWLmdSnNy1IsVPFITUwCwB7t6SgAAAA&sa=X&ved=2ahUKEwjfl83q8oSMAxV1r5UCHQSnPf0Qtq8DegQIBhAV", + "image": "https://encrypted-tbn3.gstatic.com/images?q=tbn:ANd9GcRyeScUWwAGcHyj0wfWuCvGgkrZc_OodFf6KN9_E7N8MjfYu-HE" + }, + { + "name": "Still life with the caned chair", + "extensions": ["1912"], + "link": "https://www.google.com/search?sca_esv=d856a50ff87a9623&cs=0&q=Still+life+with+the+caned+chair&stick=H4sIAAAAAAAAAONgFuLQz9U3MDOIN1fi1k_XNzQyNo8vjrfUUspOttIvyywuTcyJTywqQWJmFpdYlecXZRcvYpUPLsnMyVHIyUxLVSjPLMlQKMlIVUhOzEtNUUjOSMwsAgADu_HYXwAAAA&sa=X&ved=2ahUKEwjfl83q8oSMAxV1r5UCHQSnPf0Qtq8DegQIBhAX", + "image": "https://encrypted-tbn3.gstatic.com/images?q=tbn:ANd9GcQ9C9dkn1JbOAce_NRr-IbH0H_ttSywLANWPqBDbtD4Sm8G9TyG" + }, + { + "name": "Girl on the ball", + "extensions": ["1905"], + "link": "https://www.google.com/search?sca_esv=d856a50ff87a9623&cs=0&q=Girl+on+the+ball&stick=H4sIAAAAAAAAAONgFuLQz9U3MDOIN1fi1k_XNzQyNqssMs7TUspOttIvyywuTcyJTywqQWJmFpdYlecXZRcvYhVwzyzKUcjPUyjJSFVISszJAQAycOWXUAAAAA&sa=X&ved=2ahUKEwjfl83q8oSMAxV1r5UCHQSnPf0Qtq8DegQIBhAZ", + "image": "https://encrypted-tbn2.gstatic.com/images?q=tbn:ANd9GcSDXVgeS4hh8LBzoSZMr8A73MgwVYfvxP6x1sTIDRsWGCq0qBmj" + }, + { + "name": "Self-Portrait", + "extensions": ["1901"], + "link": "https://www.google.com/search?sca_esv=d856a50ff87a9623&cs=0&q=Self-Portrait&stick=H4sIAAAAAAAAAONgFuLQz9U3MDOIN1fi1k_XNzQyLC5KMsrVUspOttIvyywuTcyJTywqQWJmFpdYlecXZRcvYuUNTs1J0w3ILyopSswsAQCx1TH2TQAAAA&sa=X&ved=2ahUKEwjfl83q8oSMAxV1r5UCHQSnPf0Qtq8DegQIBhAb", + "image": "https://encrypted-tbn3.gstatic.com/images?q=tbn:ANd9GcRJaDd_xXpSpHrIJ9UJlE22PeNkWwE37x-v2WDySh8PwUyMZu1o" + }, + { + "name": "Child with a Dove", + "extensions": ["1901"], + "link": "https://www.google.com/search?sca_esv=d856a50ff87a9623&cs=0&q=Child+with+a+Dove&stick=H4sIAAAAAAAAAONgFuLQz9U3MDOIN1fiArFKTI3ic4u0lLKTrfTLMotLE3PiE4tKkJiZxSVW5flF2cWLWAWdMzJzUhTKM0syFBIVXPLLUgEXsnGTUAAAAA&sa=X&ved=2ahUKEwjfl83q8oSMAxV1r5UCHQSnPf0Qtq8DegQIBhAd", + "image": "https://encrypted-tbn1.gstatic.com/images?q=tbn:ANd9GcS-wR2ZNWF-wur0HEJsHtbW1dX9kWSdyZNoABBOm0KOapIRIZoF" + }, + { + "name": "Science and Charity", + "extensions": ["1897"], + "link": "https://www.google.com/search?sca_esv=d856a50ff87a9623&cs=0&q=Science+and+Charity&stick=H4sIAAAAAAAAAONgFuLQz9U3MDOIN1fiBrEMjSxzsouLtJSyk630yzKLSxNz4hOLSpCYmcUlVuX5RdnFi1iFg5MzU_OSUxUS81IUnDMSizJLKgEnplW7UwAAAA&sa=X&ved=2ahUKEwjfl83q8oSMAxV1r5UCHQSnPf0Qtq8DegQIBhAf", + "image": "https://encrypted-tbn2.gstatic.com/images?q=tbn:ANd9GcSwwDiSjslsExXvEUj9KRJDt4qgSTzlQOpzSkJlV5uYaDTPISch" + }, + { + "name": "Garçon à la pipe", + "extensions": ["1905"], + "link": "https://www.google.com/search?sca_esv=d856a50ff87a9623&cs=0&q=Gar%C3%A7on+%C3%A0+la+pipe&stick=H4sIAAAAAAAAAONgFuLQz9U3MDOIN1fiBLGMqrJM87SUspOt9Msyi0sTc-ITi0qQmJnFJVbl-UXZxYtYhdwTiw4vz89TOLxAISdRoSCzIBUA_dN0QlAAAAA&sa=X&ved=2ahUKEwjfl83q8oSMAxV1r5UCHQSnPf0Qtq8DegQIBhAh", + "image": "https://encrypted-tbn3.gstatic.com/images?q=tbn:ANd9GcRjs_E-TtzMRXJBD5ykSm-wPeXVRHeNUklj3fdTZi7o9H1PqEEk" + }, + { + "name": "Two Girls Reading", + "link": "https://www.google.com/search?sca_esv=d856a50ff87a9623&cs=0&q=Two+Girls+Reading&stick=H4sIAAAAAAAAAONgFuLQz9U3MDOIN1fi1U_XNzTMNkrKMIwvz9JSyk620i_LLC5NzIlPLCpBYmYWl1iV5xdlFy9iFQwpz1dwzyzKKVYISk1MycxLBwCGtZyEUwAAAA&sa=X&ved=2ahUKEwjfl83q8oSMAxV1r5UCHQSnPf0Qtq8DegQIBhAj", + "image": "https://encrypted-tbn1.gstatic.com/images?q=tbn:ANd9GcRMyFoUvxK5f0YMUT9_tw9k27rTBxs-ATf_UHyQvi-wGgMhiAh3" + }, + { + "name": "The Tragedy", + "extensions": ["1903"], + "link": "https://www.google.com/search?sca_esv=d856a50ff87a9623&cs=0&q=The+Tragedy+(Picasso)&stick=H4sIAAAAAAAAAONgFuLQz9U3MDOIN1fi1k_XNzQyyCjJTivRUspOttIvyywuTcyJTywqQWJmFpdYlecXZRcvYhUNyUhVCClKTE9NqVTQCMhMTiwuztcEACIAcqhVAAAA&sa=X&ved=2ahUKEwjfl83q8oSMAxV1r5UCHQSnPf0Qtq8DegQIBhAl", + "image": "https://encrypted-tbn2.gstatic.com/images?q=tbn:ANd9GcRRd-pzKzVoO_dluN6gBtdZkMLqxVIt7aLAIkYKS-C_4l38UB7W" + }, + { + "name": "First Communion", + "extensions": ["1896"], + "link": "https://www.google.com/search?sca_esv=d856a50ff87a9623&cs=0&q=First+Communion&stick=H4sIAAAAAAAAAONgFuLQz9U3MDOIN1fi1U_XNzRMNq7MLS8uqtJSyk620i_LLC5NzIlPLCpBYmYWl1iV5xdlFy9i5XfLLCouUXDOz80tzcvMzwMAmyIPf1EAAAA&sa=X&ved=2ahUKEwjfl83q8oSMAxV1r5UCHQSnPf0Qtq8DegQIBhAn", + "image": "https://encrypted-tbn1.gstatic.com/images?q=tbn:ANd9GcQIvN4f5GodeJQG8NitctoEZ3AUxyHrR3HNrdMfskxKakErHCG7" + }, + { + "name": "The Three Dancers", + "extensions": ["1925"], + "link": "https://www.google.com/search?sca_esv=d856a50ff87a9623&cs=0&q=The+Three+Dancers&stick=H4sIAAAAAAAAAONgFuLQz9U3MDOIN1fiBLHSjcsyyrWUspOt9Msyi0sTc-ITi0qQmJnFJVbl-UXZxYtYBUMyUhVCMopSUxVcEvOSU4uKATPGHuxPAAAA&sa=X&ved=2ahUKEwjfl83q8oSMAxV1r5UCHQSnPf0Qtq8DegQIBhAp", + "image": "https://encrypted-tbn2.gstatic.com/images?q=tbn:ANd9GcQSJ5CKSr2VkgtsGZQ-SdMcg_YIy_dShuYb-G8WKvm-hGEW5Pdm" + }, + { + "name": "Ma Jolie", + "extensions": ["1912"], + "link": "https://www.google.com/search?sca_esv=d856a50ff87a9623&cs=0&q=Ma+Jolie+(Picasso,+New+York)&stick=H4sIAAAAAAAAAONgFuLQz9U3MDOIN1fiArFMzNNyksq0lLKTrfTLMotLE3PiE4tKkJiZxSVW5flF2cWLWGV8ExW88nMyUxU0AjKTE4uL83UU_FLLFSKB0poAsacAnVsAAAA&sa=X&ved=2ahUKEwjfl83q8oSMAxV1r5UCHQSnPf0Qtq8DegQIBhAr", + "image": "https://encrypted-tbn1.gstatic.com/images?q=tbn:ANd9GcSoD0dT6mQwr6rySogsSANW3QRLa9sV5i_CjEFR8pM7BLrOxsvo" + }, + { + "name": "Yellow picador", + "link": "https://www.google.com/search?sca_esv=d856a50ff87a9623&cs=0&q=Yellow+picador&stick=H4sIAAAAAAAAAONgFuLQz9U3MDOIN1fi1U_XNzRMMksvSCrPNdFSyk620i_LLC5NzIlPLCpBYmYWl1iV5xdlFy9i5YtMzcnJL1coyExOTMkvAgCRZ42eUAAAAA&sa=X&ved=2ahUKEwjfl83q8oSMAxV1r5UCHQSnPf0Qtq8DegQIBhAt", + "image": "https://encrypted-tbn3.gstatic.com/images?q=tbn:ANd9GcRo4Ksi8JwlmWtYIK2UU9QZKG9e75OeHxekMbH8-QHIsHUf9c4Y" + }, + { + "name": "Woman with flower", + "extensions": ["1932"], + "link": "https://www.google.com/search?sca_esv=d856a50ff87a9623&cs=0&q=Woman+with+flower&stick=H4sIAAAAAAAAAONgFuLQz9U3MDOIN1fi1U_XNzRMNi5JK88oKdBSyk620i_LLC5NzIlPLCpBYmYWl1iV5xdlFy9iFQzPz03MUyjPLMlQSMvJL08tAgB2tP9PUwAAAA&sa=X&ved=2ahUKEwjfl83q8oSMAxV1r5UCHQSnPf0Qtq8DegQIBhAv", + "image": "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQ0S3M6GaUBM2IO4bvfM4tmnbPzwAMrvFE14FaU1H7OxEVQmk5e" + }, + { + "name": "Sylvette", + "extensions": ["1954"], + "link": "https://www.google.com/search?sca_esv=d856a50ff87a9623&cs=0&q=Sylvette&stick=H4sIAAAAAAAAAONgFuLQz9U3MDOIN1fiBLHMKw2NM7SUspOt9Msyi0sTc-ITi0qQmJnFJVbl-UXZxYtYOYIrc8pSS0pSARyYpQVGAAAA&sa=X&ved=2ahUKEwjfl83q8oSMAxV1r5UCHQSnPf0Qtq8DegQIBhAx", + "image": "https://encrypted-tbn2.gstatic.com/images?q=tbn:ANd9GcQr0p3Q32H50eHesLH5lQG-FFvdYYdff0a_D5_5SnS4hvYs4Qrb" + }, + { + "name": "Family of Saltimbanques", + "extensions": ["1905"], + "link": "https://www.google.com/search?sca_esv=d856a50ff87a9623&cs=0&q=Family+of+Saltimbanques&stick=H4sIAAAAAAAAAONgFuLQz9U3MDOIN1fiArEyjPOK88q0lLKTrfTLMotLE3PiE4tKkJiZxSVW5flF2cWLWMXdEnMzcyoV8tMUghNzSjJzkxLzCktTiwHTTWhaVgAAAA&sa=X&ved=2ahUKEwjfl83q8oSMAxV1r5UCHQSnPf0Qtq8DegQIBhAz", + "image": "https://encrypted-tbn3.gstatic.com/images?q=tbn:ANd9GcQAG6FYuThZlmqrccyubJJ6xXcJC7IwL1AxUeVLUP8PL4BnloBE" + }, + { + "name": "Weeping Woman with Handkerchief", + "link": "https://www.google.com/search?sca_esv=d856a50ff87a9623&cs=0&q=Weeping+Woman+with+Handkerchief&stick=H4sIAAAAAAAAAONgFuLQz9U3MDOIN1fi1U_XNzRMKi_MS0u3LNdSyk620i_LLC5NzIlPLCpBYmYWl1iV5xdlFy9ilQ9PTS3IzEtXCM_PTcxTKM8syVDwSMxLyU4tSs7ITE0DAMuwbdVhAAAA&sa=X&ved=2ahUKEwjfl83q8oSMAxV1r5UCHQSnPf0Qtq8DegQIBhA1", + "image": "https://encrypted-tbn2.gstatic.com/images?q=tbn:ANd9GcSCfxxad3tarJeywtUZD9S-va4lwLJ7e3hvXl04gC7UrmMumXpG" + }, + { + "name": "Femme à la montre", + "extensions": ["1932"], + "link": "https://www.google.com/search?sca_esv=d856a50ff87a9623&cs=0&q=Femme+%C3%A0+la+montre&stick=H4sIAAAAAAAAAONgFuLQz9U3MDOIN1fi1U_XNzQsyzIxKSjLztVSyk620i_LLC5NzIlPLCpBYmYWl1iV5xdlFy9iFXJLzc1NVTi8QCEnUSE3P6-kKBUA3ba-lFQAAAA&sa=X&ved=2ahUKEwjfl83q8oSMAxV1r5UCHQSnPf0Qtq8DegQIBhA3", + "image": "https://encrypted-tbn1.gstatic.com/images?q=tbn:ANd9GcSCmXWwM6SRjM0T9HOl0ABYVw7XjQTNNcQdUPTLdnzx4Iz5HN3g" + }, + { + "name": "Seated Woman", + "extensions": ["1927"], + "link": "https://www.google.com/search?sca_esv=d856a50ff87a9623&cs=0&q=Seated+Woman&stick=H4sIAAAAAAAAAONgFuLQz9U3MDOIN1fiArGMC4sMk9O1lLKTrfTLMotLE3PiE4tKkJiZxSVW5flF2cWLWHmCUxNLUlMUwvNzE_MAKrNmGUsAAAA&sa=X&ved=2ahUKEwjfl83q8oSMAxV1r5UCHQSnPf0Qtq8DegQIBhA5", + "image": "https://encrypted-tbn1.gstatic.com/images?q=tbn:ANd9GcSkIAZtGZWhDE6uM0tNN2omEfBmHnT7uVhKGuJJWy60L7g2pn3p" + }, + { + "name": "The Actor", + "extensions": ["1905"], + "link": "https://www.google.com/search?sca_esv=d856a50ff87a9623&cs=0&q=The+Actor+(painting)&stick=H4sIAAAAAAAAAONgFuLQz9U3MDOIN1fiArGSzLKyjHK1lLKTrfTLMotLE3PiE4tKkJiZxSVW5flF2cWLWEVCMlIVHJNL8osUNAoSM_NKMvPSNQEewTrZUwAAAA&sa=X&ved=2ahUKEwjfl83q8oSMAxV1r5UCHQSnPf0Qtq8DegQIBhA7", + "image": "https://encrypted-tbn2.gstatic.com/images?q=tbn:ANd9GcSeHfUr9Ea7ZkxeNe4uEnv-YjVZGa67Ug7MXQh6bKJDBRik1ilH" + }, + { + "name": "Seated Harlequin", + "extensions": ["1901"], + "link": "https://www.google.com/search?sca_esv=d856a50ff87a9623&cs=0&q=Seated+Harlequin&stick=H4sIAAAAAAAAAONgFuLQz9U3MDOIN1fi1k_XNzQyyEpLMazSUspOttIvyywuTcyJTywqQWJmFpdYlecXZRcvYhUITk0sSU1R8EgsykktLM3MAwCq9WOyUAAAAA&sa=X&ved=2ahUKEwjfl83q8oSMAxV1r5UCHQSnPf0Qtq8DegQIBhA9", + "image": "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSjapy9KLJo2U7k8ChjEmEtTkGuu3M8vWpuOeSDi7DSemDtmxKP" + }, + { + "name": "Le petit picador jaune", + "extensions": ["1889"], + "link": "https://www.google.com/search?sca_esv=d856a50ff87a9623&cs=0&q=Le+petit+picador+jaune&stick=H4sIAAAAAAAAAONgFuLQz9U3MDOIN1fiArHSsyurLM21lLKTrfTLMotLE3PiE4tKkJiZxSVW5flF2cWLWMV8UhUKUksySxQKMpMTU_KLFLISS_NSASgfhGNVAAAA&sa=X&ved=2ahUKEwjfl83q8oSMAxV1r5UCHQSnPf0Qtq8DegQIBhA_", + "image": "https://encrypted-tbn2.gstatic.com/images?q=tbn:ANd9GcTasF87LC2Jno-UFZiMHxSRM1vatwImKhcnDIQE4QKmcQPYlAMv" + }, + { + "name": "Girl with mandolin", + "extensions": ["1910"], + "link": "https://www.google.com/search?sca_esv=d856a50ff87a9623&cs=0&q=Girl+with+mandolin&stick=H4sIAAAAAAAAAONgFuLQz9U3MDOIN1fiArGMC4uMMoq0lLKTrfTLMotLE3PiE4tKkJiZxSVW5flF2cWLWIXcM4tyFMozSzIUchPzUvJzMvMANurbQFEAAAA&sa=X&ved=2ahUKEwjfl83q8oSMAxV1r5UCHQSnPf0Qtq8DegQIBhBB", + "image": "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQHdCUod1LjaB-uYB5pIzN2ZORFZLtpKAejbMg1Ld9Z0fLsPlCd" + }, + { + "name": "Woman Ironing", + "extensions": ["1904"], + "link": "https://www.google.com/search?sca_esv=d856a50ff87a9623&cs=0&q=Woman+Ironing&stick=H4sIAAAAAAAAAONgFuLQz9U3MDOIN1fi1U_XNzRMqkgpyS7PNdRSyk620i_LLC5NzIlPLCpBYmYWl1iV5xdlFy9i5Q3Pz03MU_Asys_LzEsHAIIWselPAAAA&sa=X&ved=2ahUKEwjfl83q8oSMAxV1r5UCHQSnPf0Qtq8DegQIBhBD", + "image": "https://encrypted-tbn1.gstatic.com/images?q=tbn:ANd9GcQGn3xxVEeKrl4jxABgYod4eOq6ZIwePogsnaFfy0wfFLBUuUh1" + }, + { + "name": "Celestina", + "extensions": ["1904"], + "link": "https://www.google.com/search?sca_esv=d856a50ff87a9623&cs=0&q=la+celestina+picasso&stick=H4sIAAAAAAAAAONgFuLQz9U3MDOIN1fi1U_XNzRMMkvPtSwzMNdSyk620i_LLC5NzIlPLCpBYmYWl1iV5xdlFy9iFclJVEhOzUktLsnMS1QoyExOLC7OBwAyUdfPVgAAAA&sa=X&ved=2ahUKEwjfl83q8oSMAxV1r5UCHQSnPf0Qtq8DegQIBhBF", + "image": "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSnIY7Jz0X5ApNwuFxCQZ7t5zxHzJU41Yl5WvlnbpA0Pw-zv4rL" + }, + { + "name": "The Charnel House", + "extensions": ["1945"], + "link": "https://www.google.com/search?sca_esv=d856a50ff87a9623&cs=0&q=The+Charnel+House&stick=H4sIAAAAAAAAAONgFuLQz9U3MDOIN1fiArGMCwsrks20lLKTrfTLMotLE3PiE4tKkJiZxSVW5flF2cWLWAVDMlIVnDMSi_JScxQ88kuLUwGhRqLAUAAAAA&sa=X&ved=2ahUKEwjfl83q8oSMAxV1r5UCHQSnPf0Qtq8DegQIBhBH", + "image": "https://encrypted-tbn1.gstatic.com/images?q=tbn:ANd9GcRUfX9w7mrJUD169hIqwDleu1frHvKlmb_w10Lf8aXcOK7up5in" + }, + { + "name": "Bather", + "extensions": ["1909"], + "link": "https://www.google.com/search?sca_esv=d856a50ff87a9623&cs=0&q=Bather&stick=H4sIAAAAAAAAAONgFuLQz9U3MDOIN1fiArGMCwuryiq0lLKTrfTLMotLE3PiE4tKkJiZxSVW5flF2cWLWNmcEksyUosAZr7G4kUAAAA&sa=X&ved=2ahUKEwjfl83q8oSMAxV1r5UCHQSnPf0Qtq8DegQIBhBJ", + "image": "https://encrypted-tbn2.gstatic.com/images?q=tbn:ANd9GcTKlbtyfxrH9dY0Z2q1XyFbRUd0ZLzBJ4oC9XMKIZnoinsn5urD" + }, + { + "name": "War and Peace", + "link": "https://www.google.com/search?sca_esv=d856a50ff87a9623&cs=0&q=War+and+Peace&stick=H4sIAAAAAAAAAONgFuLQz9U3MDOIN1fi1k_XNzQyKjAvN6nSUspOttIvyywuTcyJTywqQWJmFpdYlecXZRcvYuUNTyxSSMxLUQhITUxOBQBRE2CMTQAAAA&sa=X&ved=2ahUKEwjfl83q8oSMAxV1r5UCHQSnPf0Qtq8DegQIBhBL", + "image": "https://encrypted-tbn1.gstatic.com/images?q=tbn:ANd9GcSN_W_EJ4Yizec7F6aS_kM71dQjVLbhu5i7GU9vi1WhmUGW_KUN" + }, + { + "name": "On the Beach", + "link": "https://www.google.com/search?sca_esv=d856a50ff87a9623&cs=0&q=On+the+Beach&stick=H4sIAAAAAAAAAONgFuLQz9U3MDOIN1fi1k_XNzQyLEnKLizWUspOttIvyywuTcyJTywqQWJmFpdYlecXZRcvYuXxz1MoyUhVcEpNTM4AAMbnWIJMAAAA&sa=X&ved=2ahUKEwjfl83q8oSMAxV1r5UCHQSnPf0Qtq8DegQIBhBN", + "image": "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSKbJfgnUIV7Ny4awo_IslfZzJTvALlor6jgyPhDgSyRSFvsysD" + }, + { + "name": "The serenade", + "extensions": ["1942"], + "link": "https://www.google.com/search?sca_esv=d856a50ff87a9623&cs=0&q=The+serenade&stick=H4sIAAAAAAAAAONgFuLQz9U3MDOIN1fi1k_XNzQyqLQwKanUUspOttIvyywuTcyJTywqQWJmFpdYlecXZRcvYuUJyUhVKE4tSs1LTEkFAJkcrgpMAAAA&sa=X&ved=2ahUKEwjfl83q8oSMAxV1r5UCHQSnPf0Qtq8DegQIBhBP", + "image": "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcR0ypB_OXmyj8LnMRvOEJsyeDGF8CL6LHXsjAToX_b8dvgrCVST" + }, + { + "name": "Woman with book", + "extensions": ["1932"], + "link": "https://www.google.com/search?sca_esv=d856a50ff87a9623&cs=0&q=Woman+with+book&stick=H4sIAAAAAAAAAONgFuLQz9U3MDOIN1fi1U_XNzRMNi5JNy6vMNRSyk620i_LLC5NzIlPLCpBYmYWl1iV5xdlFy9i5Q_Pz03MUyjPLMlQSMrPzwYAyjM5BlEAAAA&sa=X&ved=2ahUKEwjfl83q8oSMAxV1r5UCHQSnPf0Qtq8DegQIBhBR", + "image": "https://encrypted-tbn2.gstatic.com/images?q=tbn:ANd9GcRhmMo50Q0GDDOpXP9oqYqtJ8AemZ3lrx_xXi9hw8f91f9yDJyT" + }, + { + "name": "Portrait of Ambroise Vollard", + "extensions": ["1910"], + "link": "https://www.google.com/search?sca_esv=d856a50ff87a9623&cs=0&q=Portrait+of+Ambroise+Vollard&stick=H4sIAAAAAAAAAONgFuLQz9U3MDOIN1fi1k_XNzQyqoovqIzXUspOttIvyywuTcyJTywqQWJmFpdYlecXZRcvYpUJyC8qKUrMLFHIT1NwzE0qys8sTlUIy8_JSSxKAQBpwAhuXAAAAA&sa=X&ved=2ahUKEwjfl83q8oSMAxV1r5UCHQSnPf0Qtq8DegQIBhBT", + "image": "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSc5Clp9PtpSl-LJgshboXPGoW8l-5gx-QjHLQZ-YNhQwfyx-vu" + }, + { + "name": "Jacqueline", + "extensions": ["1961"], + "link": "https://www.google.com/search?sca_esv=d856a50ff87a9623&cs=0&q=Jacqueline+(painting)&stick=H4sIAAAAAAAAAONgFuLQz9U3MDOIN1fiArGMCooqk1O0lLKTrfTLMotLE3PiE4tKkJiZxSVW5flF2cWLWEW9EpMLS1NzMvNSFTQKEjPzSjLz0jUBqNuyblQAAAA&sa=X&ved=2ahUKEwjfl83q8oSMAxV1r5UCHQSnPf0Qtq8DegQIBhBV", + "image": "https://encrypted-tbn1.gstatic.com/images?q=tbn:ANd9GcSRU39-Q6TW3DgzNJpnKghKZDIEcDd49StP2vrJkq7YlE6P_TkQ" + }, + { + "name": "Tête de Femme", + "link": "https://www.google.com/search?sca_esv=d856a50ff87a9623&cs=0&q=T%C3%AAte+de+Femme&stick=H4sIAAAAAAAAAONgFuLQz9U3MDOIN1fi1k_XNzQyMrZITq7SUspOttIvyywuTcyJTywqQWJmFpdYlecXZRcvYuULObyqJFUhJVXBLTU3NxUARv8x204AAAA&sa=X&ved=2ahUKEwjfl83q8oSMAxV1r5UCHQSnPf0Qtq8DegQIBhBX", + "image": "https://encrypted-tbn1.gstatic.com/images?q=tbn:ANd9GcRbOQ7Ic4G9i3qLTpsPC80RQ4ctl52HFKaNA6CthiqeNcPigSUG" + }, + { + "name": "Still life (The dessert)", + "extensions": ["1901"], + "link": "https://www.google.com/search?sca_esv=d856a50ff87a9623&cs=0&q=Still+life+(The+dessert)&stick=H4sIAAAAAAAAAONgFuLQz9U3MDOIN1fi1k_XNzQyNi_LS4_XUspOttIvyywuTcyJTywqQWJmFpdYlecXZRcvYpUILsnMyVHIyUxLVdAIyUhVSEktLk4tKtEEAA_tAmxYAAAA&sa=X&ved=2ahUKEwjfl83q8oSMAxV1r5UCHQSnPf0Qtq8DegQIBhBZ", + "image": "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQ248LO5IdMOFpipW5UerH5xCHO3nhvnMdtVdX6Sh9ctwKpXbMU" + }, + { + "name": "Minotauromachy", + "extensions": ["1935"], + "link": "https://www.google.com/search?sca_esv=d856a50ff87a9623&cs=0&q=Minotauromachy&stick=H4sIAAAAAAAAAONgFuLQz9U3MDOIN1fiArFMzHOKivK0lLKTrfTLMotLE3PiE4tKkJiZxSVW5flF2cWLWPl8M_PySxJLi_JzE5MzKgGuASW-TQAAAA&sa=X&ved=2ahUKEwjfl83q8oSMAxV1r5UCHQSnPf0Qtq8DegQIBhBb", + "image": "https://encrypted-tbn1.gstatic.com/images?q=tbn:ANd9GcTV4t4otcQLqFYTD8zaPvzj6ovc0xLR3cVRUQnIlrAtzgO8K2Qv" + }, + { + "name": "Woman dressed in blue", + "extensions": ["1901"], + "link": "https://www.google.com/search?sca_esv=d856a50ff87a9623&cs=0&q=Woman+dressed+in+blue&stick=H4sIAAAAAAAAAONgFuLQz9U3MDOIN1fi1k_XNzQytshNqijRUspOttIvyywuTcyJTywqQWJmFpdYlecXZRcvYhUNz89NzFNIKUotLk5NUcjMU0jKKU0FAO8ptotVAAAA&sa=X&ved=2ahUKEwjfl83q8oSMAxV1r5UCHQSnPf0Qtq8DegQIBhBd", + "image": "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcShIW46paqJmCfxH5eR609tgZ1emO04GprcQCma-z6Hma49sVJG" + } + ] +} \ No newline at end of file diff --git a/spec/fixtures/expected-steve-mccurry-photos.json b/spec/fixtures/expected-steve-mccurry-photos.json new file mode 100644 index 00000000..5ecb50c9 --- /dev/null +++ b/spec/fixtures/expected-steve-mccurry-photos.json @@ -0,0 +1,16 @@ +{ + "photos": [ + { + "name": "Afghan Girl", + "extensions": ["1984"], + "link": "https://www.google.com/search?sca_esv=d856a50ff87a9623&cs=0&q=Afghan+Girl&stick=H4sIAAAAAAAAAONgFuLUz9U3MCs0LUhWAjONLMuq4rWUspOt9Msyi0sTc-ITi0qQmJnFJVbl-UXZxYtYuR3T0jMS8xTcM4tyAHCp4R9KAAAA&sa=X&ved=2ahUKEwjwqvDD9oSMAxU3lZUCHaRhKk4Qtq8DegQIBhAD", + "image": "https://encrypted-tbn2.gstatic.com/images?q=tbn:ANd9GcTuMonsjG1WMzRhs5-UDE0t2Ww5phvf7GjFSnfF9k9IhvQ78Fza" + }, + { + "name": "Camels under a Blackened Sky", + "extensions": ["1991"], + "link": "https://www.google.com/search?sca_esv=d856a50ff87a9623&cs=0&q=Camels+under+a+Blackened+Sky&stick=H4sIAAAAAAAAAONgFuLUz9U3MCs0LUhW4gIx44uMi0sqtJSyk630yzKLSxNz4hOLSpCYmcUlVuX5RdnFi1hlnBNzU3OKFUrzUlKLFBIVnHISk7NT81JTFIKzKwGElb2WXAAAAA&sa=X&ved=2ahUKEwjwqvDD9oSMAxU3lZUCHaRhKk4Qtq8DegQIBhAF", + "image": "https://encrypted-tbn2.gstatic.com/images?q=tbn:ANd9GcRP0OFe84RbN4rBF5DtkOwiG-YJdWWIq0aExcwqsemLRfYxRd7S" + } + ] +} diff --git a/spec/fixtures/picasso-paintings.html b/spec/fixtures/picasso-paintings.html new file mode 100644 index 00000000..2898e68c --- /dev/null +++ b/spec/fixtures/picasso-paintings.html @@ -0,0 +1,52 @@ +picasso - Google Search

Accessibility links

Quick Settings
Advanced Search
About 283,000,000 results (0.35 seconds) 
Pablo Picasso
Spanish painter and sculptor
OverviewArtworksPeriods

Search Results

Page navigation

Google apps
\ No newline at end of file diff --git a/spec/fixtures/steve-mccurry-photos.html b/spec/fixtures/steve-mccurry-photos.html new file mode 100644 index 00000000..935889b1 --- /dev/null +++ b/spec/fixtures/steve-mccurry-photos.html @@ -0,0 +1,74 @@ +Steve McCurry - Google Search

Accessibility links

Quick Settings
Advanced Search
About 2,210,000 results (0.47 seconds) 
Steve McCurry
American photographer
OverviewArtworksBooksAwards

Search Results

Page navigation

Google apps
\ No newline at end of file From 0c3a4aa129e36ef5a276d721c5150f0dd504ef06 Mon Sep 17 00:00:00 2001 From: Jalerson Lima Date: Wed, 12 Mar 2025 13:15:52 -0300 Subject: [PATCH 3/3] wrap up --- .gitignore | 2 ++ .rspec | 3 +++ Gemfile | 8 ++++++++ Gemfile.lock | 36 ++++++++++++++++++++++++++++++++++++ scrape.rb | 25 +++++++++++++++++++++++++ spec/spec_helper.rb | 1 + 6 files changed, 75 insertions(+) create mode 100644 .rspec create mode 100644 Gemfile create mode 100644 Gemfile.lock create mode 100644 scrape.rb create mode 100644 spec/spec_helper.rb diff --git a/.gitignore b/.gitignore index fb7a88e1..7d4d7444 100644 --- a/.gitignore +++ b/.gitignore @@ -49,3 +49,5 @@ build-iPhoneSimulator/ # unless supporting rvm < 1.11.0 or doing something fancy, ignore this: .rvmrc .DS_Store + +.byebug_history \ No newline at end of file diff --git a/.rspec b/.rspec new file mode 100644 index 00000000..5255835f --- /dev/null +++ b/.rspec @@ -0,0 +1,3 @@ +--color +--format documentation +--require spec_helper \ No newline at end of file diff --git a/Gemfile b/Gemfile new file mode 100644 index 00000000..86f08de6 --- /dev/null +++ b/Gemfile @@ -0,0 +1,8 @@ +# frozen_string_literal: true + +source "https://rubygems.org" + +# gem "rails" +gem 'nokolexbor', '~> 0.6.0' +gem 'byebug' +gem 'rspec' \ No newline at end of file diff --git a/Gemfile.lock b/Gemfile.lock new file mode 100644 index 00000000..e429c0af --- /dev/null +++ b/Gemfile.lock @@ -0,0 +1,36 @@ +GEM + remote: https://rubygems.org/ + specs: + byebug (11.1.3) + diff-lcs (1.6.0) + nokolexbor (0.6.0) + nokolexbor (0.6.0-arm64-darwin) + nokolexbor (0.6.0-x86_64-darwin) + nokolexbor (0.6.0-x86_64-linux) + rspec (3.13.0) + rspec-core (~> 3.13.0) + rspec-expectations (~> 3.13.0) + rspec-mocks (~> 3.13.0) + rspec-core (3.13.3) + rspec-support (~> 3.13.0) + rspec-expectations (3.13.3) + diff-lcs (>= 1.2.0, < 2.0) + rspec-support (~> 3.13.0) + rspec-mocks (3.13.2) + diff-lcs (>= 1.2.0, < 2.0) + rspec-support (~> 3.13.0) + rspec-support (3.13.2) + +PLATFORMS + arm64-darwin + ruby + x86_64-darwin + x86_64-linux + +DEPENDENCIES + byebug + nokolexbor (~> 0.6.0) + rspec + +BUNDLED WITH + 2.6.3 diff --git a/scrape.rb b/scrape.rb new file mode 100644 index 00000000..e9156f3d --- /dev/null +++ b/scrape.rb @@ -0,0 +1,25 @@ +require 'nokogiri' +require 'nokolexbor' +require 'json' + +require_relative 'lib/scrapers/google/gallery' + +input_file = ARGV[0] || './files/van-gogh-paintings.html' +parser_name = ARGV[1] || 'output.json' + +case parser_name +when 'nokolexbor' + html_parser = Nokolexbor +when 'nokogiri' + html_parser = Nokogiri +else + html_parser = Nokolexbor +end + +scraper = Scrapers::Google::Gallery.new(parser: html_parser) + +html = File.read(input_file) +artworks = scraper.scrape(html) + +output = {artworks: artworks} +puts output.to_json \ No newline at end of file diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb new file mode 100644 index 00000000..26027830 --- /dev/null +++ b/spec/spec_helper.rb @@ -0,0 +1 @@ +require 'byebug' \ No newline at end of file