Skip to content

Refactored and implement of a template method pattern for logger config in webmagic-scripts #1158

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Apr 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions webmagic-scripts/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,12 @@
<artifactId>webmagic-extension</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>1.18.32</version>
<scope>provided</scope>
</dependency>
</dependencies>

<build>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
package us.codecraft.webmagic.scripts;

import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;

import lombok.Getter;
import lombok.Setter;
import us.codecraft.webmagic.scripts.languages.JRuby;
import us.codecraft.webmagic.scripts.languages.Javascript;
import us.codecraft.webmagic.scripts.languages.Language;
import us.codecraft.webmagic.utils.WMCollections;

public class Params {
@Getter
Language language = new Javascript();

@Getter @Setter
String scriptFileName;

@Getter @Setter
List<String> urls;

@Getter @Setter
int thread = 1;

@Getter @Setter
int sleepTime = 1000;

private static Map<Language, Set<String>> alias;

public Params() {
alias = new HashMap<Language, Set<String>>();
alias.put(new Javascript(), WMCollections.<String>newHashSet("js", "javascript", "JavaScript", "JS"));
alias.put(new JRuby(), WMCollections.<String>newHashSet("ruby", "jruby", "Ruby", "JRuby"));
}

public void setLanguagefromArg(String arg) {
for (Map.Entry<Language, Set<String>> languageSetEntry : alias.entrySet()) {
if (languageSetEntry.getValue().contains(arg)) {
this.language = languageSetEntry.getKey();
return;
}
}
}
}
Original file line number Diff line number Diff line change
@@ -1,90 +1,21 @@
package us.codecraft.webmagic.scripts;

import org.apache.commons.cli.*;
import org.apache.logging.log4j.Level;
import org.apache.logging.log4j.core.Logger;
import org.slf4j.LoggerFactory;

import us.codecraft.webmagic.ResultItems;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.pipeline.Pipeline;
import us.codecraft.webmagic.scripts.config.CommandLineOption;
import us.codecraft.webmagic.utils.WMCollections;

import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;

/**
* @author [email protected]
* @author [email protected] / FrancoisGib
* @since 0.4.1
*/
public class ScriptConsole {

private static class Params {
Language language = Language.JavaScript;
String scriptFileName;
List<String> urls;
int thread = 1;
int sleepTime = 1000;
private static Map<Language, Set<String>> alias = new HashMap<Language, Set<String>>();

static {
alias.put(Language.JavaScript, WMCollections.<String>newHashSet("js", "javascript", "JavaScript", "JS"));
alias.put(Language.JRuby, WMCollections.<String>newHashSet("ruby", "jruby", "Ruby", "JRuby"));
}

public void setLanguagefromArg(String arg) {
for (Map.Entry<Language, Set<String>> languageSetEntry : alias.entrySet()) {
if (languageSetEntry.getValue().contains(arg)) {
this.language = languageSetEntry.getKey();
return;
}
}
}

private Language getLanguage() {
return language;
}

private void setLanguage(Language language) {
this.language = language;
}

private String getScriptFileName() {
return scriptFileName;
}

private void setScriptFileName(String scriptFileName) {
this.scriptFileName = scriptFileName;
}

private List<String> getUrls() {
return urls;
}

private void setUrls(List<String> urls) {
this.urls = urls;
}

private int getThread() {
return thread;
}

private void setThread(int thread) {
this.thread = thread;
}

private int getSleepTime() {
return sleepTime;
}

private void setSleepTime(int sleepTime) {
this.sleepTime = sleepTime;
}
}

public static void main(String[] args) {
Params params = parseCommand(args);
startSpider(params);
Expand Down Expand Up @@ -142,45 +73,9 @@ private static void exit() {

private static Params readOptions(CommandLine commandLine) {
Params params = new Params();
if (commandLine.hasOption("l")) {
String language = commandLine.getOptionValue("l");
params.setLanguagefromArg(language);
}
if (commandLine.hasOption("f")) {
String scriptFilename = commandLine.getOptionValue("f");
params.setScriptFileName(scriptFilename);
} else {
exit();
}
if (commandLine.hasOption("s")) {
Integer sleepTime = Integer.parseInt(commandLine.getOptionValue("s"));
params.setSleepTime(sleepTime);
}
if (commandLine.hasOption("t")) {
Integer thread = Integer.parseInt(commandLine.getOptionValue("t"));
params.setThread(thread);
}
if (commandLine.hasOption("g")) {
configLogger(commandLine.getOptionValue("g"));
}
params.setUrls(commandLine.getArgList());
List<CommandLineOption> options = CommandLineOption.getAllOptions();
for (CommandLineOption option : options)
option.addParamOptionIfInCommandLine(params, commandLine);
return params;
}

private static void configLogger(String value) {
Logger rootLogger = (Logger) LoggerFactory.getLogger(org.slf4j.Logger.ROOT_LOGGER_NAME);
if ("debug".equalsIgnoreCase(value)) {
rootLogger.setLevel(Level.DEBUG);
} else if ("info".equalsIgnoreCase(value)) {
rootLogger.setLevel(Level.INFO);
} else if ("warn".equalsIgnoreCase(value)) {
rootLogger.setLevel(Level.WARN);
} else if ("trace".equalsIgnoreCase(value)) {
rootLogger.setLevel(Level.TRACE);
} else if ("off".equalsIgnoreCase(value)) {
rootLogger.setLevel(Level.OFF);
} else if ("error".equalsIgnoreCase(value)) {
rootLogger.setLevel(Level.ERROR);
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

import javax.script.ScriptEngine;
import javax.script.ScriptEngineManager;

import us.codecraft.webmagic.scripts.languages.Language;

import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.atomic.AtomicInteger;

Expand All @@ -11,14 +14,11 @@
*/
public class ScriptEnginePool {

private final int size;

private final AtomicInteger availableCount;

private final LinkedBlockingQueue<ScriptEngine> scriptEngines = new LinkedBlockingQueue<ScriptEngine>();

public ScriptEnginePool(Language language,int size) {
this.size = size;
this.availableCount = new AtomicInteger(size);
for (int i=0;i<size;i++){
ScriptEngineManager manager = new ScriptEngineManager();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,14 @@
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.util.Iterator;
import java.util.Map;
import javax.script.ScriptContext;
import javax.script.ScriptEngine;
import javax.script.ScriptException;
import org.apache.commons.io.IOUtils;
import org.jruby.RubyHash;
import org.python.core.PyDictionary;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.scripts.languages.Language;

/**
* @author [email protected]
Expand Down Expand Up @@ -55,35 +52,7 @@ public void process(Page page) {
context.setAttribute("page", page, ScriptContext.ENGINE_SCOPE);
context.setAttribute("config", site, ScriptContext.ENGINE_SCOPE);
try {
switch (language) {
case JavaScript:
engine.eval(defines + "\n" + script, context);
// NativeObject o = (NativeObject) engine.get("result");
// if (o != null) {
// for (Object o1 : o.getIds()) {
// String key = String.valueOf(o1);
// page.getResultItems().put(key, NativeObject.getProperty(o, key));
// }
// }
break;
case JRuby:
RubyHash oRuby = (RubyHash) engine.eval(defines + "\n" + script, context);
Iterator itruby = oRuby.entrySet().iterator();
while (itruby.hasNext()) {
Map.Entry pairs = (Map.Entry) itruby.next();
page.getResultItems().put(pairs.getKey().toString(), pairs.getValue());
}
break;
case Jython:
engine.eval(defines + "\n" + script, context);
PyDictionary oJython = (PyDictionary) engine.get("result");
Iterator it = oJython.entrySet().iterator();
while (it.hasNext()) {
Map.Entry pairs = (Map.Entry) it.next();
page.getResultItems().put(pairs.getKey().toString(), pairs.getValue());
}
break;
}
this.language.process(engine, defines, script, page);
} catch (ScriptException e) {
e.printStackTrace();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,17 @@
import java.nio.charset.Charset;
import org.apache.commons.io.IOUtils;

import us.codecraft.webmagic.scripts.languages.Javascript;
import us.codecraft.webmagic.scripts.languages.Language;


/**
* @author [email protected]
* @since 0.4.1
*/
public class ScriptProcessorBuilder {

private static final Language DefaultLanguage = Language.JavaScript;
private static final Language DefaultLanguage = new Javascript();

private Language language = DefaultLanguage;

Expand All @@ -39,7 +42,6 @@ public ScriptProcessorBuilder scriptFromFile(String fileName) {
InputStream resourceAsStream = new FileInputStream(fileName);
this.script = IOUtils.toString(resourceAsStream, Charset.defaultCharset());
} catch (IOException e) {
//wrap IOException because I prefer a runtime exception...
throw new IllegalArgumentException(e);
}
return this;
Expand All @@ -50,7 +52,6 @@ public ScriptProcessorBuilder scriptFromClassPathFile(String fileName) {
InputStream resourceAsStream = ScriptProcessor.class.getClassLoader().getResourceAsStream(fileName);
this.script = IOUtils.toString(resourceAsStream, Charset.defaultCharset());
} catch (IOException e) {
//wrap IOException because I prefer a runtime exception...
throw new IllegalArgumentException(e);
}
return this;
Expand Down
Loading