split strains

This commit is contained in:
xinglie.lkf
2019-10-10 12:31:43 +08:00
parent ffe9b9d2b5
commit abc237336c

View File

@@ -1,4 +1,4 @@
let puppeteer = require('puppeteer'); //let puppeteer = require('puppeteer');
let fs = require('fs'); let fs = require('fs');
let path = require('path'); let path = require('path');
let https = require('https'); let https = require('https');
@@ -13,7 +13,7 @@ let delays = {
delay: 1 * 1000 delay: 1 * 1000
}, },
so360: { so360: {
base: 5 * 1000, base: 3 * 1000,
delay: 1 * 1000 delay: 1 * 1000
}, },
google: { google: {
@@ -23,11 +23,11 @@ let delays = {
}; };
let fixLessNums = { let fixLessNums = {
baidu: [0, 0], baidu: [0, 100],
google: [0, 100], google: [0, 500],
bing: [0, 50],// bing: [0, 200],//
bing_en: [0, 50], bing_en: [0, 200],
so360: [0, 0] so360: [0, 200]
}; };
let bingSpecialNumbers = { let bingSpecialNumbers = {
// 12300: 1, // 12300: 1,
@@ -238,7 +238,7 @@ let rank = {
let p = folder + sep + file; let p = folder + sep + file;
let stat = fs.lstatSync(p); let stat = fs.lstatSync(p);
if (stat.isDirectory()) { if (stat.isDirectory()) {
walk(p, callback); //walk(p, callback);
} else { } else {
callback(p); callback(p);
} }
@@ -263,14 +263,18 @@ let ciTask = async () => {
return new Promise(resolve => { return new Promise(resolve => {
let readList = {}; let readList = {};
let caches = {}; let caches = {};
let cList = ['baidu', 'so360', 'bing']; let cList = ['so360', 'baidu'];
if (canVisitGoogle) { if (canVisitGoogle) {
cList.push('google'); cList.push('google', 'bing');
} }
for (let c of cList) { for (let c of cList) {
caches[c] = Object.create(null); caches[c] = Object.create(null);
} }
rank.list('../ci', f => {
if (ciReg.test(f)) {
readList[path.resolve(f)] = 1;
}
});
for (let c of cList) { for (let c of cList) {
if (fs.existsSync('./s.' + c + '.cache')) { if (fs.existsSync('./s.' + c + '.cache')) {
let d = rank.read('./s.' + c + '.cache'); let d = rank.read('./s.' + c + '.cache');
@@ -280,7 +284,6 @@ let ciTask = async () => {
} }
} }
let loadList = Object.keys(readList); let loadList = Object.keys(readList);
let singleWork = (file, type) => { let singleWork = (file, type) => {
let ranks = []; let ranks = [];
let zeros = []; let zeros = [];
@@ -525,20 +528,20 @@ let poetTask = async () => {
}); });
}; };
let runLessNumberTask = async (type = 'poet') => { let runLessNumberTask = async (cat = 'poet') => {
let caches = Object.create(null); let caches = Object.create(null);
let writeCtrl = 0; let writeCtrl = 0;
return new Promise(resolve => { return new Promise(resolve => {
let cList = ['baidu', 'bing'];//'bing','baidu', 'so360', let cList = ['so360'];//'bing','baidu', 'so360',
if (canVisitGoogle) { if (canVisitGoogle) {
//cList.push('google'); cList.push('google', 'baidu', 'bing');
} }
for (let c of cList) { for (let c of cList) {
caches[c] = Object.create(null); caches[c] = Object.create(null);
} }
let taskList = {}; let taskList = {};
let taskIndex = {}; let taskIndex = {};
rank.list(`./${type}_temp`, f => { rank.list(`./${cat}_temp`, f => {
for (let c of cList) { for (let c of cList) {
if (f.includes(`.${c}.`)) { if (f.includes(`.${c}.`)) {
if (!taskList[c]) { if (!taskList[c]) {
@@ -578,7 +581,8 @@ let runLessNumberTask = async (type = 'poet') => {
writeCtrl++; writeCtrl++;
oldCount++; oldCount++;
//console.log(type, 'checked zero at', start); //console.log(type, 'checked zero at', start);
let kd = encodeURIComponent(`${r.author} ${r.title}`); let title = cat == 'ci' ? r.rhythmic : r.title;
let kd = encodeURIComponent(`${r.author} ${title}`);
let data, let data,
delay = 0, delay = 0,
old = r[type], old = r[type],
@@ -588,7 +592,7 @@ let runLessNumberTask = async (type = 'poet') => {
} else { } else {
data = await rank.remote(kd, type); data = await rank.remote(kd, type);
data.author = r.author; data.author = r.author;
if (type == 'poet') { if (cat == 'poet') {
data.title = r.title; data.title = r.title;
} else { } else {
data.rhythmic = r.rhythmic; data.rhythmic = r.rhythmic;
@@ -666,6 +670,7 @@ let runLessNumberTask = async (type = 'poet') => {
} else { } else {
finised[type] = true; finised[type] = true;
rank.write('./s.' + type + '.index.cache', [taskIndex[type][0], 0]); rank.write('./s.' + type + '.index.cache', [taskIndex[type][0], 0]);
console.log(`${type} finished`);
check(); check();
} }
}; };
@@ -824,7 +829,28 @@ let outputBingNumbers = () => {
result = result.sort((b, a) => a.count - b.count); result = result.sort((b, a) => a.count - b.count);
console.log(result); console.log(result);
}; };
let splitStrains = () => {
rank.list('../json', f => {
if (poetReg.test(f)) {
let file = path.basename(f);
let list = JSON.parse(rank.read(f));
let aims = [];
for (let e of list) {
if (!e.strains) {
throw new Error(`strains does not exists ${e}`);
}
aims.push(e.strains);
delete e.strains;
}
rank.write(`../json/${file}`, JSON.stringify(list, null, 4));
rank.write(`../strains/json/${file}`, JSON.stringify(aims, null, 4));
}
});
};
(async () => { (async () => {
splitStrains();
//mergeCi();
//merge(); //merge();
//await ciTask(); //await ciTask();
//mergeCi(); //mergeCi();
@@ -833,9 +859,11 @@ let outputBingNumbers = () => {
//console.log('check google.com'); //console.log('check google.com');
//await checkGoogle(); //await checkGoogle();
//console.log('google.com', canVisitGoogle); //console.log('google.com', canVisitGoogle);
//await runLessNumberTask('poet'); //await ciTask();
//await Promise.all([ciTask(), runLessNumberTask('ci')]);
//await runLessNumberTask('ci');
//await runLessNumberTask(); //await runLessNumberTask();
//headless.after(); //headless.after();
//mergePoet(); //mergePoet();
mergePoet(); //mergePoet();
})(); })();