Skip to content

Commit

Permalink
Merge pull request #117 from CybercentreCanada/url_size
Browse files Browse the repository at this point in the history
Remove length limits in URL_RE
  • Loading branch information
cccs-jh authored Mar 3, 2025
2 parents bb7879f + 467de89 commit f5da338
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 2 deletions.
4 changes: 2 additions & 2 deletions src/multidecoder/decoders/network.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,10 @@
# #-& is #-/ but stopped before '
URL_RE = (
rb"(?i)(?:ftp|https?)://" # scheme
rb"(?:[\w!$-.:;=~@]{,2000}@)?" # userinfo
rb"(?:[\w!$-.:;=~@]*@)?" # userinfo
rb"(?:(?!%5B)[%A-Z0-9.-]{4,253}|(?:\[|%5B)[%0-9A-F:]{3,117}(?:\]|%5D))" # host
rb"(?::[0-6]?[0-9]{0,4})?" # port
rb"(?:[/?#](?:[\w!#-/:;=@?~]{,2000}[\w!#-&(*+\-/:=@?~])?)?" # path, query and fragment
rb"(?:[/?#](?:[\w!#-/:;=@?~]*[\w!#-&(*+\-/:=@?~])?)?" # path, query and fragment
# The final char class stops urls from ending in ' ) , . or ;
# to prevent trailing characters from being included in the url.
)
Expand Down
2 changes: 2 additions & 0 deletions tests/test_decoders/test_network.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,8 @@ def test_email_re():
b"http://%5B::1]", # The colons used to have to be percent encoded in edge and chrome, but not anymore.
b"http://[::1%5D", # You wouldn't think this would work, but it still does on Chrome and Edge.
b"http://[::1%5D/path", # Even handles the rest of the url just fine.
# Large URLs
b"http://youtube.com" + (b"%20" * 2000) + b"@google.com",
],
)
def test_URL_RE_matches(url):
Expand Down

0 comments on commit f5da338

Please sign in to comment.