当前位置：首页 > Web开发 > 正文

加一个"?" re.findall("h.*?\."

2024-03-31 Web开发

import requests import re url = ‘‘
#url=‘‘ r=requests.get(url) r.encoding=‘utf-8‘ data=re.findall(‘<title>(.*?)</title>‘,r.text,re.S) print(data)

[‘京东(JD.COM)-正品低价、品质保障、配送及时、轻松购物！‘]

[‘东方工业网：财经门户，供给专业的财经、股票、行情、证券、基金、理财、银行、保险、信托、期货、黄金、股吧、博客等种种财经资讯及数据‘]

import re

# 提取python

key = "javapythonc++php"

re.findall("python", key)[0]

"""

python

"""

# 提取出hello world

key = "<html><h1>hello world</h1></html>"

re.findall(‘<h1>hello world</h1>‘, key)

"""

[‘<h1>hello world</h1>‘]

"""

# 提取170

string = "我喜欢身高为170的女孩"

# re.findall("170", string)[0]

re.findall(‘\d+‘, string)

"""

[‘170‘]

"""

# 提取出和https://

key = ‘ and https://boob.com‘

re.findall(‘https{0,1}‘, key) # {}前的字符呈现0次或1次

"""

[‘http‘, ‘https‘]

"""

# 提取出hit.

key = "[email protected]"

re.findall("h.*\.", key) # .暗示任意字符(\n除外)；*暗示匹配0个或多个；\暗示对.转义

"""

[‘hit.edu.‘]

"""

# 贪婪模式：按照正则表达式尽可能多地提取数据。

# 切换为非贪婪模式,加一个"?"

re.findall("h.*?\.", key)

"""

[‘hit.‘]

"""

# 匹配sas和saas

key = "saas and sas and saaas"

re.findall(‘sa{1,2}s‘, key) # 匹配1-2次由前面表达式界说的片段

"""

[‘saas‘, ‘sas‘]

"""

# 匹配i开头的行 re.S:基于单行匹配 re.M:基于多行匹配

string = ‘‘‘fall in love with you

i love you very much

i love she

i love her‘‘‘

re.findall("^i.*", string, re.M)

"""

[‘i love you very much‘, ‘i love she‘, ‘i love her‘]

"""

# 匹配所有的行

string = """<div>静夜思

床前明月光

疑是地上霜

举头望明月

垂头思故乡

</div>"""

re.findall(‘<div>.*</div>‘, string, re.S)

"""

[‘<div>静夜思\n床前明月光\n疑是地上霜\n举头望明月\n垂头思故乡\n</div>‘]

"""

re 正则表达式爬取网站标题

，

温馨提示: 本文由Jm博客推荐，转载请保留链接: https://www.jmwww.net/file/web/29937.html

上一篇：提高网页的浏览率
下一篇： 3、问题解决：最简单的办法就是直接修改搭建项目的端口号

加一个"?" re.findall("h.*?\."

推荐文章

热门文章

标签

友情链接

关于本站

联系我们

特别鸣谢