diff --git a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/Application.java b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/Application.java deleted file mode 100644 index b2c5d70..0000000 --- a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/Application.java +++ /dev/null @@ -1,22 +0,0 @@ -package com.abin.mallchat.common.common.utils.chain; - -import com.abin.mallchat.common.common.utils.discover.domain.UrlInfo; -import org.jetbrains.annotations.Nullable; -import org.jsoup.nodes.Document; - -import java.util.Map; - -/** - * Description: 测试 - * Author: achao - * Date: 2023/7/6 9:29 - */ -public class Application { - public static void main(String[] args) { - PrioritizedUrlHandler handler = new PrioritizedUrlHandler(); - String longStr = "其中包含一个URL www.baidu.com,一个带有端口号的URL http://www.jd.com:80, 一个带有路径的URL http://mallchat.cn, 还有美团技术文章https://mp.weixin.qq.com/s/hwTf4bDck9_tlFpgVDeIKg "; - - Map urlContentMap = handler.getUrlContentMap(longStr); - System.out.println(urlContentMap); - } -} diff --git a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/CommonUrlHandler.java b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/CommonUrlHandler.java deleted file mode 100644 index 5d85f00..0000000 --- a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/CommonUrlHandler.java +++ /dev/null @@ -1,51 +0,0 @@ -package com.abin.mallchat.common.common.utils.chain; - -import cn.hutool.core.util.StrUtil; -import org.jetbrains.annotations.Nullable; -import org.jsoup.nodes.Document; - -/** - * Description: - * Author: achao - * Date: 2023/7/6 9:25 - */ -public class CommonUrlHandler extends FactoryUrlHandler { - - @Nullable - @Override - public String getTitle(Document document) { - return document.title(); - } - - @Nullable - @Override - public String getDescription(Document document) { - String description = document.head().select("meta[name=description]").attr("content"); - String keywords = document.head().select("meta[name=keywords]").attr("content"); - String content = StrUtil.isNotBlank(description) ? description : keywords; - //只保留一句话的描述 - return StrUtil.isNotBlank(content) ? content.substring(0, content.indexOf("。")) : content; - } - - @Nullable - @Override - public String getImage(String url, Document document) { - String image = document.select("link[type=image/x-icon]").attr("href"); - //如果没有去匹配含有icon属性的logo - String href = StrUtil.isEmpty(image) ? document.select("link[rel$=icon]").attr("href") : image; - //如果icon中已经包含了url部分域名 - if (StrUtil.isNotBlank(StrUtil.removeAny(StrUtil.removeAny(href, "/"), "favicon.ico")) && - StrUtil.containsAny(StrUtil.removePrefix(url, "http://"), StrUtil.removeAny(StrUtil.removeAny(href, "/"), "favicon.ico"))) { - return "http://" + StrUtil.removePrefix(href, "/"); - } - //如果url已经包含了logo - if (StrUtil.containsAny(url, "favicon")) { - return url; - } - //如果logo中有url - if (StrUtil.containsAny(href, "http") || StrUtil.containsAny(href, "https")) { - return href; - } - return StrUtil.format("{}/{}", url, StrUtil.removePrefix(href, "/")); - } -} diff --git a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/FactoryUrlHandler.java b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/FactoryUrlHandler.java deleted file mode 100644 index bd8d0e8..0000000 --- a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/FactoryUrlHandler.java +++ /dev/null @@ -1,96 +0,0 @@ -package com.abin.mallchat.common.common.utils.chain; - -import cn.hutool.core.util.ReUtil; -import cn.hutool.core.util.StrUtil; -import com.abin.mallchat.common.common.utils.FutureUtils; -import com.abin.mallchat.common.common.utils.discover.domain.UrlInfo; -import lombok.extern.slf4j.Slf4j; -import org.jsoup.Connection; -import org.jsoup.Jsoup; -import org.jsoup.nodes.Document; -import org.springframework.data.util.Pair; - -import javax.annotation.Nullable; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Objects; -import java.util.concurrent.CompletableFuture; -import java.util.regex.Pattern; -import java.util.stream.Collectors; - -/** - * Description: 链接处理工厂 - * Author: achao - * Date: 2023/7/6 9:12 - */ -@Slf4j -public abstract class FactoryUrlHandler extends UrlHandler{ - - //链接识别的正则 - private static final Pattern PATTERN = Pattern.compile("((http|https)://)?(www.)?([\\w_-]+(?:(?:\\.[\\w_-]+)+))([\\w.,@?^=%&:/~+#-]*[\\w@?^=%&/~+#-])?"); - - @Override - @Nullable - public Map getUrlContentMap(String content) { - - if (StrUtil.isBlank(content)) { - return new HashMap<>(); - } - List matchList = ReUtil.findAll(PATTERN, content, 0); - - //并行请求 - List>> futures = matchList.stream().map(match -> CompletableFuture.supplyAsync(() -> { - UrlInfo urlInfo = getContent(match); - return Objects.isNull(urlInfo) ? null : Pair.of(match, urlInfo); - })).collect(Collectors.toList()); - CompletableFuture>> future = FutureUtils.sequenceNonNull(futures); - //结果组装 - return future.join().stream().collect(Collectors.toMap(Pair::getFirst, Pair::getSecond, (a, b) -> a)); - } - - private UrlInfo getContent(String url){ - url = !StrUtil.startWith(url, "http") ? "http://" + url : url; - Document document = getUrlDocument(url); - return UrlInfo.builder() - .title(getTitle(document)) - .description(getDescription(document)) - .image(getImage(url,document)).build(); - } - - protected Document getUrlDocument(String matchUrl) { - try { - Connection connect = Jsoup.connect(matchUrl); - connect.timeout(2000); - return connect.get(); - } catch (Exception e) { - log.error("find error:url:{}", matchUrl, e); - } - return null; - } - - /** - * 获取链接的标题 - * @param document - * @return - */ - @Nullable - abstract String getTitle(Document document); - - /** - * 获取链接的描述 - * @param document - * @return - */ - @Nullable - abstract String getDescription(Document document); - - /** - * 获取链接的LOGO - * @param document - * @return - */ - @Nullable - abstract String getImage(String url, Document document); - -} diff --git a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/PrioritizedUrlHandler.java b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/PrioritizedUrlHandler.java deleted file mode 100644 index b06ad32..0000000 --- a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/PrioritizedUrlHandler.java +++ /dev/null @@ -1,39 +0,0 @@ -package com.abin.mallchat.common.common.utils.chain; - -import cn.hutool.core.util.StrUtil; -import com.abin.mallchat.common.common.utils.discover.UrlDiscover; -import com.abin.mallchat.common.common.utils.discover.domain.UrlInfo; -import org.jetbrains.annotations.Nullable; -import org.jsoup.nodes.Document; - -import java.util.List; -import java.util.Map; - -/** - * Description: 优先级链接统一处理扩展类 - * Author: achao - * Date: 2023/7/6 9:36 - */ -public class PrioritizedUrlHandler extends FactoryUrlHandler { - - private final FactoryUrlHandler commonUrlHandler = new CommonUrlHandler(); - private final FactoryUrlHandler wxUrlHandler = new WxUrlHandler(); - - @Nullable - @Override - String getTitle(Document document) { - return StrUtil.isBlank(wxUrlHandler.getTitle(document)) ? commonUrlHandler.getTitle(document) : wxUrlHandler.getTitle(document); - } - - @Nullable - @Override - String getDescription(Document document) { - return StrUtil.isBlank(wxUrlHandler.getDescription(document)) ? commonUrlHandler.getDescription(document) : wxUrlHandler.getDescription(document); - } - - @Nullable - @Override - String getImage(String url, Document document) { - return StrUtil.isBlank(wxUrlHandler.getImage(url, document)) ? commonUrlHandler.getImage(url, document) : wxUrlHandler.getImage(url, document); - } -} diff --git a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/UrlHandler.java b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/UrlHandler.java deleted file mode 100644 index c9a9a3d..0000000 --- a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/UrlHandler.java +++ /dev/null @@ -1,25 +0,0 @@ -package com.abin.mallchat.common.common.utils.chain; - -import com.abin.mallchat.common.common.utils.discover.domain.UrlInfo; -import org.jsoup.nodes.Document; - -import javax.annotation.Nullable; -import java.util.Map; -import java.util.regex.Pattern; - -/** - * Description: url集合处理抽象接口定义类 - * Author: achao - * Date: 2023/7/6 8:58 - */ -public abstract class UrlHandler { - - /** - * 提取消息中的所有链接,并组装Map - * @param content - * @return - */ - @Nullable - abstract Map getUrlContentMap(String content); - -} diff --git a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/WxUrlHandler.java b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/WxUrlHandler.java deleted file mode 100644 index f8356d9..0000000 --- a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/WxUrlHandler.java +++ /dev/null @@ -1,32 +0,0 @@ -package com.abin.mallchat.common.common.utils.chain; - -import cn.hutool.core.util.StrUtil; -import org.jetbrains.annotations.Nullable; -import org.jsoup.nodes.Document; - -/** - * Description: - * Author: achao - * Date: 2023/7/6 9:34 - */ -public class WxUrlHandler extends FactoryUrlHandler { - - @Nullable - @Override - public String getTitle(Document document) { - return document.getElementsByAttributeValue("property", "og:title").attr("content"); - } - - @Nullable - @Override - public String getDescription(Document document) { - String description = document.getElementsByAttributeValue("property", "og:description").attr("content"); - return StrUtil.isNotBlank(description) ? description.substring(0, description.indexOf("。")) : description; - } - - @Nullable - @Override - public String getImage(String url, Document document) { - return document.getElementsByAttributeValue("property", "og:image").attr("content"); - } -} diff --git a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/dto/UrlInfo.java b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/dto/UrlInfo.java deleted file mode 100644 index ad1050a..0000000 --- a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/dto/UrlInfo.java +++ /dev/null @@ -1,32 +0,0 @@ -package com.abin.mallchat.common.common.utils.chain.dto; - -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.Data; -import lombok.NoArgsConstructor; - -/** - * Description: 链接信息提取类 - * Author: achao - * Date: 2023/7/6 8:54 - */ -@Data -@Builder -@AllArgsConstructor -@NoArgsConstructor -public class UrlInfo { - /** - * 标题 - **/ - String title; - - /** - * 描述 - **/ - String description; - - /** - * 网站LOGO - **/ - String image; -}