mirror of
https://kkgithub.com/chinese-poetry/chinese-poetry.git
synced 2026-04-17 01:33:41 +00:00
split strains
This commit is contained in:
@@ -1,4 +1,4 @@
|
|||||||
let puppeteer = require('puppeteer');
|
//let puppeteer = require('puppeteer');
|
||||||
let fs = require('fs');
|
let fs = require('fs');
|
||||||
let path = require('path');
|
let path = require('path');
|
||||||
let https = require('https');
|
let https = require('https');
|
||||||
@@ -13,7 +13,7 @@ let delays = {
|
|||||||
delay: 1 * 1000
|
delay: 1 * 1000
|
||||||
},
|
},
|
||||||
so360: {
|
so360: {
|
||||||
base: 5 * 1000,
|
base: 3 * 1000,
|
||||||
delay: 1 * 1000
|
delay: 1 * 1000
|
||||||
},
|
},
|
||||||
google: {
|
google: {
|
||||||
@@ -23,11 +23,11 @@ let delays = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
let fixLessNums = {
|
let fixLessNums = {
|
||||||
baidu: [0, 0],
|
baidu: [0, 100],
|
||||||
google: [0, 100],
|
google: [0, 500],
|
||||||
bing: [0, 50],//
|
bing: [0, 200],//
|
||||||
bing_en: [0, 50],
|
bing_en: [0, 200],
|
||||||
so360: [0, 0]
|
so360: [0, 200]
|
||||||
};
|
};
|
||||||
let bingSpecialNumbers = {
|
let bingSpecialNumbers = {
|
||||||
// 12300: 1,
|
// 12300: 1,
|
||||||
@@ -238,7 +238,7 @@ let rank = {
|
|||||||
let p = folder + sep + file;
|
let p = folder + sep + file;
|
||||||
let stat = fs.lstatSync(p);
|
let stat = fs.lstatSync(p);
|
||||||
if (stat.isDirectory()) {
|
if (stat.isDirectory()) {
|
||||||
walk(p, callback);
|
//walk(p, callback);
|
||||||
} else {
|
} else {
|
||||||
callback(p);
|
callback(p);
|
||||||
}
|
}
|
||||||
@@ -263,14 +263,18 @@ let ciTask = async () => {
|
|||||||
return new Promise(resolve => {
|
return new Promise(resolve => {
|
||||||
let readList = {};
|
let readList = {};
|
||||||
let caches = {};
|
let caches = {};
|
||||||
let cList = ['baidu', 'so360', 'bing'];
|
let cList = ['so360', 'baidu'];
|
||||||
if (canVisitGoogle) {
|
if (canVisitGoogle) {
|
||||||
cList.push('google');
|
cList.push('google', 'bing');
|
||||||
}
|
}
|
||||||
|
|
||||||
for (let c of cList) {
|
for (let c of cList) {
|
||||||
caches[c] = Object.create(null);
|
caches[c] = Object.create(null);
|
||||||
}
|
}
|
||||||
|
rank.list('../ci', f => {
|
||||||
|
if (ciReg.test(f)) {
|
||||||
|
readList[path.resolve(f)] = 1;
|
||||||
|
}
|
||||||
|
});
|
||||||
for (let c of cList) {
|
for (let c of cList) {
|
||||||
if (fs.existsSync('./s.' + c + '.cache')) {
|
if (fs.existsSync('./s.' + c + '.cache')) {
|
||||||
let d = rank.read('./s.' + c + '.cache');
|
let d = rank.read('./s.' + c + '.cache');
|
||||||
@@ -280,7 +284,6 @@ let ciTask = async () => {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
let loadList = Object.keys(readList);
|
let loadList = Object.keys(readList);
|
||||||
|
|
||||||
let singleWork = (file, type) => {
|
let singleWork = (file, type) => {
|
||||||
let ranks = [];
|
let ranks = [];
|
||||||
let zeros = [];
|
let zeros = [];
|
||||||
@@ -525,20 +528,20 @@ let poetTask = async () => {
|
|||||||
});
|
});
|
||||||
};
|
};
|
||||||
|
|
||||||
let runLessNumberTask = async (type = 'poet') => {
|
let runLessNumberTask = async (cat = 'poet') => {
|
||||||
let caches = Object.create(null);
|
let caches = Object.create(null);
|
||||||
let writeCtrl = 0;
|
let writeCtrl = 0;
|
||||||
return new Promise(resolve => {
|
return new Promise(resolve => {
|
||||||
let cList = ['baidu', 'bing'];//'bing','baidu', 'so360',
|
let cList = ['so360'];//'bing','baidu', 'so360',
|
||||||
if (canVisitGoogle) {
|
if (canVisitGoogle) {
|
||||||
//cList.push('google');
|
cList.push('google', 'baidu', 'bing');
|
||||||
}
|
}
|
||||||
for (let c of cList) {
|
for (let c of cList) {
|
||||||
caches[c] = Object.create(null);
|
caches[c] = Object.create(null);
|
||||||
}
|
}
|
||||||
let taskList = {};
|
let taskList = {};
|
||||||
let taskIndex = {};
|
let taskIndex = {};
|
||||||
rank.list(`./${type}_temp`, f => {
|
rank.list(`./${cat}_temp`, f => {
|
||||||
for (let c of cList) {
|
for (let c of cList) {
|
||||||
if (f.includes(`.${c}.`)) {
|
if (f.includes(`.${c}.`)) {
|
||||||
if (!taskList[c]) {
|
if (!taskList[c]) {
|
||||||
@@ -578,7 +581,8 @@ let runLessNumberTask = async (type = 'poet') => {
|
|||||||
writeCtrl++;
|
writeCtrl++;
|
||||||
oldCount++;
|
oldCount++;
|
||||||
//console.log(type, 'checked zero at', start);
|
//console.log(type, 'checked zero at', start);
|
||||||
let kd = encodeURIComponent(`${r.author} ${r.title}`);
|
let title = cat == 'ci' ? r.rhythmic : r.title;
|
||||||
|
let kd = encodeURIComponent(`${r.author} ${title}`);
|
||||||
let data,
|
let data,
|
||||||
delay = 0,
|
delay = 0,
|
||||||
old = r[type],
|
old = r[type],
|
||||||
@@ -588,7 +592,7 @@ let runLessNumberTask = async (type = 'poet') => {
|
|||||||
} else {
|
} else {
|
||||||
data = await rank.remote(kd, type);
|
data = await rank.remote(kd, type);
|
||||||
data.author = r.author;
|
data.author = r.author;
|
||||||
if (type == 'poet') {
|
if (cat == 'poet') {
|
||||||
data.title = r.title;
|
data.title = r.title;
|
||||||
} else {
|
} else {
|
||||||
data.rhythmic = r.rhythmic;
|
data.rhythmic = r.rhythmic;
|
||||||
@@ -666,6 +670,7 @@ let runLessNumberTask = async (type = 'poet') => {
|
|||||||
} else {
|
} else {
|
||||||
finised[type] = true;
|
finised[type] = true;
|
||||||
rank.write('./s.' + type + '.index.cache', [taskIndex[type][0], 0]);
|
rank.write('./s.' + type + '.index.cache', [taskIndex[type][0], 0]);
|
||||||
|
console.log(`${type} finished`);
|
||||||
check();
|
check();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@@ -824,7 +829,28 @@ let outputBingNumbers = () => {
|
|||||||
result = result.sort((b, a) => a.count - b.count);
|
result = result.sort((b, a) => a.count - b.count);
|
||||||
console.log(result);
|
console.log(result);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let splitStrains = () => {
|
||||||
|
rank.list('../json', f => {
|
||||||
|
if (poetReg.test(f)) {
|
||||||
|
let file = path.basename(f);
|
||||||
|
let list = JSON.parse(rank.read(f));
|
||||||
|
let aims = [];
|
||||||
|
for (let e of list) {
|
||||||
|
if (!e.strains) {
|
||||||
|
throw new Error(`strains does not exists ${e}`);
|
||||||
|
}
|
||||||
|
aims.push(e.strains);
|
||||||
|
delete e.strains;
|
||||||
|
}
|
||||||
|
rank.write(`../json/${file}`, JSON.stringify(list, null, 4));
|
||||||
|
rank.write(`../strains/json/${file}`, JSON.stringify(aims, null, 4));
|
||||||
|
}
|
||||||
|
});
|
||||||
|
};
|
||||||
(async () => {
|
(async () => {
|
||||||
|
splitStrains();
|
||||||
|
//mergeCi();
|
||||||
//merge();
|
//merge();
|
||||||
//await ciTask();
|
//await ciTask();
|
||||||
//mergeCi();
|
//mergeCi();
|
||||||
@@ -833,9 +859,11 @@ let outputBingNumbers = () => {
|
|||||||
//console.log('check google.com');
|
//console.log('check google.com');
|
||||||
//await checkGoogle();
|
//await checkGoogle();
|
||||||
//console.log('google.com', canVisitGoogle);
|
//console.log('google.com', canVisitGoogle);
|
||||||
//await runLessNumberTask('poet');
|
//await ciTask();
|
||||||
|
//await Promise.all([ciTask(), runLessNumberTask('ci')]);
|
||||||
|
//await runLessNumberTask('ci');
|
||||||
//await runLessNumberTask();
|
//await runLessNumberTask();
|
||||||
//headless.after();
|
//headless.after();
|
||||||
//mergePoet();
|
//mergePoet();
|
||||||
mergePoet();
|
//mergePoet();
|
||||||
})();
|
})();
|
||||||
Reference in New Issue
Block a user