Coverage for adhoc-cicd-odoo-odoo / odoo / tools / urls.py: 70%

22 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-03-09 18:22 +0000

1import re 

2import urllib.parse 

3 

4__all__ = ['urljoin'] 

5 

6 

7def _contains_dot_segments(path: str) -> str: 

8 # most servers decode url before doing dot segment resolutions 

9 decoded_path = urllib.parse.unquote(path, errors='strict') 

10 return any(seg in ('.', '..') for seg in decoded_path.split('/')) 

11 

12 

13def urljoin(base: str, extra: str) -> str: 

14 """Join a trusted base URL with a relative URL safely. 

15 

16 Unlike standard URL joins that follow RFC 3986 (e.g., `urllib.parse.urljoin`), 

17 this function enforces strict behavior that better aligns with developer 

18 expectations and guards against path traversals, unplanned redirects, and 

19 accidental host/scheme overrides. 

20 

21 - Behaves similarly to `base + '/' + extra` 

22 - Keeps scheme and netloc from `base`, and raises an error if `extra` has them 

23 - Ignores any scheme/host in `extra` 

24 - Forbids `.` and `..` path traversal 

25 - merges path/query/fragment 

26 

27 :param base: Trusted base URL or path. 

28 :type base: str 

29 :param extra: Relative URL (`path`, `?query`, `#frag`). No scheme & host allowed unless it matches `base` 

30 :type extra: str 

31 :returns: joined URL. 

32 :rtype: str 

33 :raises AssertionError: If inputs are not strings. 

34 :raises ValueError: `extra` contains dot-segments or is absolute URLs. 

35 

36 Examples:: 

37 

38 >>> urljoin('https://api.example.com/v1/?bar=fiz', '/users/42?bar=bob') 

39 'https://api.example.com/v1/users/42?bar=bob' 

40 

41 >>> urljoin('https://example.com/foo', 'http://8.8.8.8/foo') 

42 Traceback (most recent call last): 

43 ... 

44 ValueError: Extra URL must use same scheme and host as base, and begin with base path 

45 

46 >>> urljoin('https://api.example.com/data/', '/?lang=fr') 

47 'https://api.example.com/data/?lang=fr' 

48 """ 

49 assert isinstance(base, str), "Base URL must be a string" 

50 assert isinstance(extra, str), "Extra URL must be a string" 

51 

52 b_scheme, b_netloc, path, _, _ = urllib.parse.urlsplit(base) 

53 e_scheme, e_netloc, e_path, e_query, e_fragment = urllib.parse.urlsplit(extra) 

54 

55 if e_scheme or e_netloc: 55 ↛ 57line 55 didn't jump to line 57 because the condition on line 55 was never true

56 # allow absolute extra URL if it matches base 

57 if (e_scheme != b_scheme) or (e_netloc != b_netloc) or not e_path.startswith(path): 

58 raise ValueError("Extra URL must use same scheme and host as base, and begin with base path") 

59 

60 e_path = e_path[len(path):] 

61 

62 if e_path: 62 ↛ 69line 62 didn't jump to line 69 because the condition on line 62 was always true

63 # prevent urljoin("/", "\\example.com/") to resolve as absolute to "//example.com/" in a browser redirect 

64 # https://github.com/mozilla-firefox/firefox/blob/5e81b64f4ed88b610eb332e103744d68ee8b6c0d/netwerk/base/nsStandardURL.cpp#L2386-L2388 

65 e_path = e_path.lstrip('/\\\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f ') 

66 path = f'{path}/{e_path}' 

67 

68 # normalize: foo//bar -> foo/bar 

69 path = re.sub(r'/+', '/', path) 

70 

71 if _contains_dot_segments(path): 71 ↛ 72line 71 didn't jump to line 72 because the condition on line 71 was never true

72 raise ValueError("Dot segments are not allowed") 

73 

74 return urllib.parse.urlunsplit((b_scheme, b_netloc, path, e_query, e_fragment))