Skip to content

Commit

Permalink
feat: html 크롤링 방식과 naver api 방식을 조합하는 Extractor 구현
Browse files Browse the repository at this point in the history
  • Loading branch information
fromitive committed Aug 16, 2024
1 parent 4192154 commit fdbe1e2
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 1 deletion.
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
package com.zzang.chongdae.offering.config;

import com.zzang.chongdae.offering.service.CombinationProductImageExtractor;
import com.zzang.chongdae.offering.service.NaverApiProductImageExtractor;
import com.zzang.chongdae.offering.service.OgTagProductImageExtractor;
import com.zzang.chongdae.offering.service.ProductImageExtractor;
import com.zzang.chongdae.offering.util.httpclient.crawler.HtmlCrawler;
import com.zzang.chongdae.offering.util.httpclient.crawler.JsoupHtmlCrawler;
import com.zzang.chongdae.offering.util.httpclient.naver.NaverScrapClient;
import java.util.List;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;

Expand All @@ -16,8 +21,17 @@ public NaverScrapClient naverScrapClient() {
return new NaverScrapClient(CRAWLER_TIMEOUT_MILLISECONDS);
}

@Bean
public HtmlCrawler htmlCrawler() {
return new JsoupHtmlCrawler(CRAWLER_TIMEOUT_MILLISECONDS);
}

@Bean
public ProductImageExtractor productImageExtractor() {
return new NaverApiProductImageExtractor(naverScrapClient());
List<ProductImageExtractor> extractors = List.of(
new OgTagProductImageExtractor(htmlCrawler()),
new NaverApiProductImageExtractor(naverScrapClient())
);
return new CombinationProductImageExtractor(extractors);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package com.zzang.chongdae.offering.service;

import java.util.List;
import lombok.RequiredArgsConstructor;
import org.springframework.stereotype.Component;

@RequiredArgsConstructor
@Component
public class CombinationProductImageExtractor implements ProductImageExtractor {

private final List<ProductImageExtractor> extractors;

@Override
public String extract(String productUrl) {
for (ProductImageExtractor extractor : extractors) {
String imageUrl = extractor.extract(productUrl);
if (isExtractSuccess(imageUrl)) {
return imageUrl;
}
}
return "";
}

private boolean isExtractSuccess(String imageUrl) {
return !imageUrl.isEmpty();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@

import com.zzang.chongdae.offering.util.httpclient.crawler.HtmlCrawler;
import lombok.RequiredArgsConstructor;
import org.springframework.stereotype.Component;

@RequiredArgsConstructor
@Component
public class OgTagProductImageExtractor implements ProductImageExtractor {

public static final String OG_IMAGE_PROPERTY = "og:image";
Expand Down

0 comments on commit fdbe1e2

Please sign in to comment.