mirror of
https://github.com/zongzibinbin/MallChat.git
synced 2026-03-13 21:53:41 +08:00
fix:url解析图片时进行链接有效性校验
This commit is contained in:
@@ -11,6 +11,9 @@ import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.springframework.data.util.Pair;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.net.HttpURLConnection;
|
||||
import java.net.URL;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
@@ -59,7 +62,7 @@ public abstract class AbstractUrlDiscover implements UrlDiscover {
|
||||
return UrlInfo.builder()
|
||||
.title(getTitle(document))
|
||||
.description(getDescription(document))
|
||||
.image(getImage(assemble(url),document)).build();
|
||||
.image(getImage(assemble(url), document)).build();
|
||||
}
|
||||
|
||||
|
||||
@@ -83,4 +86,32 @@ public abstract class AbstractUrlDiscover implements UrlDiscover {
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* 判断链接是否有效
|
||||
* 输入链接
|
||||
* 返回true或者false
|
||||
*/
|
||||
public static boolean isConnect(String href) {
|
||||
//请求地址
|
||||
URL url;
|
||||
//请求状态码
|
||||
int state;
|
||||
//下载链接类型
|
||||
String fileType;
|
||||
try {
|
||||
url = new URL(href);
|
||||
HttpURLConnection httpURLConnection = (HttpURLConnection) url.openConnection();
|
||||
state = httpURLConnection.getResponseCode();
|
||||
fileType = httpURLConnection.getHeaderField("Content-Disposition");
|
||||
//如果成功200,缓存304,移动302都算有效链接,并且不是下载链接
|
||||
if ((state == 200 || state == 302 || state == 304) && fileType == null) {
|
||||
return true;
|
||||
}
|
||||
httpURLConnection.disconnect();
|
||||
} catch (Exception e) {
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -7,6 +7,10 @@ import org.jetbrains.annotations.Nullable;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.net.HttpURLConnection;
|
||||
import java.net.URL;
|
||||
|
||||
/**
|
||||
* @author zhaoqichao
|
||||
* @date 2023/7/3 16:54
|
||||
@@ -34,19 +38,17 @@ public class CommonUrlDiscover extends AbstractUrlDiscover {
|
||||
String image = document.select("link[type=image/x-icon]").attr("href");
|
||||
//如果没有去匹配含有icon属性的logo
|
||||
String href = StrUtil.isEmpty(image) ? document.select("link[rel$=icon]").attr("href") : image;
|
||||
//如果icon中已经包含了url部分域名
|
||||
if (StrUtil.isNotBlank(StrUtil.removeAny(StrUtil.removeAny(href, "/"), "favicon.ico")) &&
|
||||
StrUtil.containsAny(StrUtil.removePrefix(url, "http://"), StrUtil.removeAny(StrUtil.removeAny(href, "/"), "favicon.ico"))) {
|
||||
return "http://" + StrUtil.removePrefix(href, "/");
|
||||
}
|
||||
//如果url已经包含了logo
|
||||
if (StrUtil.containsAny(url, "favicon")) {
|
||||
return url;
|
||||
}
|
||||
//如果logo中有url
|
||||
if (StrUtil.containsAny(href, "http") || StrUtil.containsAny(href, "https")) {
|
||||
//如果icon可以直接访问或者包含了http
|
||||
if (isConnect(!StrUtil.startWith(href, "http") ? "http:" + href : href)) {
|
||||
return href;
|
||||
}
|
||||
|
||||
return StrUtil.format("{}/{}", url, StrUtil.removePrefix(href, "/"));
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
@@ -25,6 +25,7 @@ public class WxUrlDiscover extends AbstractUrlDiscover {
|
||||
@Nullable
|
||||
@Override
|
||||
public String getImage(String url, Document document) {
|
||||
return document.getElementsByAttributeValue("property", "og:image").attr("content");
|
||||
String href = document.getElementsByAttributeValue("property", "og:image").attr("content");
|
||||
return isConnect(href) ? href: null;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user