Java正则表达式获得html字符串中img标签的src中的url地址

参考:https://www.cnblogs.com/gmq-sh/p/5820937.html

// 只要src的url值
	@Test
	public void getImgStr() {
		String htmlStr = "<img changedsize=\"true\" class=\"BDE_Image\" height=\"400\" src=\"https://imgsa.baidu.com/forum/w%3D580/sign=b23191a36b63f6241c5d390bb745eb32/8c361530e924b899e92f63a26e061d950a7bf623.jpg\">";
		Set<String> pics = new HashSet<>();
		String img = "";
		Pattern p_image;
		Matcher m_image;
		// String regEx_img = "<img.*src=(.*?)[^>]*?>"; //图片链接地址
		String regEx_img = "<img.*src\\s*=\\s*(.*?)[^>]*?>";
		p_image = Pattern.compile(regEx_img, Pattern.CASE_INSENSITIVE);
		m_image = p_image.matcher(htmlStr);
		while (m_image.find()) {
			// 得到<img />数据
			img = m_image.group();
			// 匹配<img>中的src数据
			Matcher m = Pattern.compile("src\\s*=\\s*\"?(.*?)(\"|>|\\s+)").matcher(img);
			while (m.find()) {
				pics.add(m.group(1));
			}
		}
		pics.forEach(System.out::println);
	}
@Test
	public void getImgStr1() {
		String htmlStr = "<img changedsize=\"true\" class=\"BDE_Image\" height=\"400\" src=\"https://imgsa.baidu.com/forum/w%3D580/sign=b23191a36b63f6241c5d390bb745eb32/8c361530e924b899e92f63a26e061d950a7bf623.jpg\">";
		String[] pics = htmlStr.split(" ");
		for (String pic : pics) {
			if (pic.contains("src")) {
				String src = pic;
				String substring = src.substring(5, src.length() - 2);
				System.out.println(substring);
			}
		}
	}

测试结果: 

********************************* 不积跬步无以至千里,不积小流无以成江海 ********************************* 

猜你喜欢

转载自blog.csdn.net/weixin_42465125/article/details/88551967