@iryu54/scraper
Scrap website with a configuration
Last updated a year ago by iryu54 .
MIT · Original npm · Tarball · package.json
$ cnpm install @iryu54/scraper 
SYNC missed versions from official npm registry.

Scraper

Scrap website with a configuration

Usage

npm i @iryu54/scraper

Crawl page

const Scraper = require('@iryu54/scraper')
const html = await Scraper.fetch('<URL>')

Scrap with a config

// index.js
const config = require('../test.conf')
const Scraper = require('@iryu54/scraper')
const parsedSite = await Scraper.scrapUrl('<URL>', config)
// test.conf.js
module.exports = [
  { // find .name-title and insert value in name field. Value is include in innerText of HTMLElement
    "title": "name",
    "value": ".main-title",
    "text": true
  },
  { // find .wrapper img and insert value in img field. Value is include in src attributes
    "title": "img",
    "value": ".wrapper img",
    "attr": "src"
  },
  { // find .count and insert value in nbPerson field. Value is include in value attributes then transform to a number
    "title": "nbPerson",
    "value": ".count input",
    "attr": "value",
    "transform": value => +value
  },
  { // find .list and insert value in preparation field. Value is include in innerText then transform on CheerioStatic is applied 
    "title": "preparation",
    "value": ".list",
    "text": true,
    html: value => {
      return value.toArray()
        .map(($preparation, i) => {
          $preparation = cheerio.load(cheerio.html($preparation))
          $preparation('h3').remove()
          return `<h3>Etape ${i + 1}</h3>\n<p>${$preparation('.recipe-item').text().trim()}</p>`
        }).join('\n')
    }
  },
  { // Execute instructions inside .list HTMLElement and put result in array on ingredients field
    "title": "ingredients",
    "value": ".list",
    "children": [
      {
        "title": "quantity",
        "value": ".recipe-ingredient-qt",
        "text": true
      },
      {
        "title": "name",
        "value": ".ingredient",
        "text": true
      }
    ]
  }
]

Current Tags

  • 1.0.0                                ...           latest (a year ago)

1 Versions

  • 1.0.0                                ...           a year ago
Maintainers (1)
Downloads
Today 0
This Week 0
This Month 0
Last Day 0
Last Week 1
Last Month 1
Dependencies (6)
Dev Dependencies (4)
Dependents (1)

Copyright 2014 - 2016 © taobao.org |