使用nightwatch框架爬取fb数据

话不多说直接上代码,简略教程见上一篇

const urls = [
  'https://www.facebook.com/search/groups/?q=Vape Vapor Ottawa&epa=SERP_TAB'
  //......
];

var successResults = [];


var search = (browser, url) => {
  browser
    .url(url)
    .waitForElementVisible('#BrowseResultsContainer',5000,'the page is loaded')
    .pause(1000)
    .timeoutsAsyncScript(600000, function(result) {
       console.log(result);
     })
    .executeAsync(function(successResults, url, done) {
      var parseRaw = (raw) => {
        const name = raw.querySelector('._4bl9 a').text;
        const groupUrl = raw.querySelector('._4bl9 a').getAttribute('href');
        [groupLine, descLine ]  = raw.querySelector('._glo').innerText.split('\n');

        return {
          name,
          groupUrl,
          groupLine,
          descLine,
        };
      };
      var scrollPage = function() {
        return new Promise(function(resolve, reject) {
          var timer = setInterval(function() {
            let currentHeight = document.body.scrollHeight;
            window.scrollTo(0, currentHeight);
            if (document.getElementById('browse_end_of_results_footer')) {
              clearInterval(timer);
              resolve();
            }
          }, 1000);
        });
      };
      scrollPage().then(function() {
        const raws = document.querySelectorAll('._401d');
        var results = [];
        raws.forEach((raw, index) => {
          if (index === raws.length - 1) {
            return;
          }
          results.push(parseRaw(raw));
          return;
        });
        done({
          url,
          results,
        });
      });
    }, [successResults, url], function(results) {
      //console.log('done :' + results.value.url);
      successResults.push(results.value);
    })
    .pause(1000);
};

module.exports = {
   before : function(browser) {
    browser.globals.waitForConditionTimeout = 5000;
  },
  'step one': function (browser) {
    browser
      .url('https://www.facebook.com/')
      .waitForElementVisible('#login_form')
      .setValue('#email', '[email protected]')
      .setValue('#pass', 'pyhwuf-8camMu-novcof')
      .click('#loginbutton')
      .waitForElementVisible('#userNav')
  },  
  'step two': function(browser) {
    // map search urls
      urls.forEach((url) => {
        search(browser, url);
      });
  },
  'step three': (browser) => {
    console.log(JSON.stringify(successResults));
    browser.end();

  }
};
发布了10 篇原创文章 · 获赞 0 · 访问量 1579

猜你喜欢

转载自blog.csdn.net/weixin_45266779/article/details/102456978