dimanche 25 juin 2017

Is extracting functionality into a class ok to make it testable?

I have a difficult time testing the class below. At this point, I would have to mock Jsoup calls for the many methods that extract a single article component (headline, lead etc.).

I managed to solve it by extracting all the functionality related to scrape(Selector) into a separate class. The problem is, it doesn't belong in a separate class. I think this might be the code smell "feature envy"?

The class when the functionality is not yet extracted:

public abstract class NewsPaperArticleScraper implements ArticleScraper {
    private final static Medium medium = NEWS_PAPER_WEBSITE;
    private ArticleComponentSelectorContainer selector;
    private Document articleDocument;
    private MD5HashCalculator hashCalculator;
    private Company company;

    public NewsPaperArticleScraper(ArticleComponentSelectorContainer selector, MD5HashCalculator hashCalculator, Company company) {
        this.selector = selector;
        this.hashCalculator = hashCalculator;
        this.company = company;
    }

    public Article scrape(Document document, Category category, int orderOfAppearance) {
        if (document == null) {
            return null;
        }
        this.articleDocument = document;

        String headline = getHeadline();
        String body = getBody();
        String hash = hashCalculator.hash(headline + body);

        return article()
                .withUrl(getUr())
                .withHash(hash)
                .withHeadline(headline)
                .withOrderOfAppearance(orderOfAppearance)
                .withSubtTitle(getSubTitle())
                .withLead(getLead())
                .withBody(body)
                .withQuotations(getQuotations())
                .withAuthor(getAuthor())
                .withCompany(company)
                .withCategories(getCategories(category))
                .withSubjects(getSubjects())
                .withImages(getImages())
                .withSources(getSources())
                .withFetchDate(LocalDate.now())
                .withPublishedDate(getPublishedDate())
                .withSubArticles(getSubArticles())
                .withMedium(medium)
                .build();
    }

    private String scrape(Selector selector) {
        if (selector.hasTagOnly()) {
            return scrapeByTag(selector.getTag());
        } else {
            return scrapeByTagAndAttribute(selector.getTag(), selector.getAttr());
        }
    }

    private String scrapeByTag(String tag) {
        Elements elements = articleDocument.select(tag);

        if (elements.isEmpty()) {
            return "";
        }

        if (elements.size() == 1) {
            return elements.first().text().trim();
        } else {
            return createParagraphedText(elements);
        }
    }

    private String createParagraphedText(Elements elements) {
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < elements.size() - 1; i++) {
            sb.append(getTextWithTrailingBlankLine(elements.get(i)));
        }
        sb.append(getTextLastElement(elements));
        return sb.toString();
    }

    private String getTextWithTrailingBlankLine(Element element) {
        return element.text().trim() + System.lineSeparator() + System.lineSeparator();
    }

    private String getTextLastElement(Elements elements) {
        return elements.last().text().trim();
    }

    private String scrapeByTagAndAttribute(String tag, String attr) {
        return articleDocument.select(tag).attr(attr);
    }

    // Value setting methods for the article that's being assembled in scrape(document, ...).
    private String getUr() {
        return articleDocument.baseUri();
    }

    private String getHeadline() {
        return scrape(selector.forHeadline());
    }

    private String getSubTitle() {
        return scrape(selector.forSubtitle());
    }

    private String getLead() {
        return scrape(selector.forLead());
    }

    private String getBody() {
        return scrape(selector.forBody());
    }

    protected String getAuthor() {
        return scrape(selector.forAuthor());
    }

    private LocalDate getPublishedDate() {
        throw new NotImplementedException();
    }

    protected abstract Set<Source> getSources();

    protected abstract Set<Category> getCategories(Category category);

    protected abstract List<Quotation> getQuotations();

    protected abstract List<Image> getImages();

    protected abstract List<Subject> getSubjects();

    protected abstract List<SubArticle> getSubArticles();
}

Aucun commentaire:

Enregistrer un commentaire