added completly new version for haslach 2025

This commit is contained in:
fotobox
2025-03-17 03:47:13 +01:00
parent 152832515c
commit 769ab91da8
2333 changed files with 409208 additions and 341 deletions

View File

@@ -0,0 +1,5 @@
from ._url import URL, cache_clear, cache_configure, cache_info
__version__ = "1.9.4"
__all__ = ("URL", "cache_clear", "cache_configure", "cache_info")

View File

@@ -0,0 +1,121 @@
import sys
from functools import _CacheInfo
from typing import Any, Mapping, Optional, Sequence, Tuple, Type, Union, overload
import multidict
if sys.version_info >= (3, 8):
from typing import Final, TypedDict, final
else:
from typing_extensions import Final, TypedDict, final
_SimpleQuery = Union[str, int, float]
_QueryVariable = Union[_SimpleQuery, Sequence[_SimpleQuery]]
_Query = Union[
None, str, Mapping[str, _QueryVariable], Sequence[Tuple[str, _QueryVariable]]
]
@final
class URL:
scheme: Final[str]
raw_user: Final[str]
user: Final[Optional[str]]
raw_password: Final[Optional[str]]
password: Final[Optional[str]]
raw_host: Final[Optional[str]]
host: Final[Optional[str]]
port: Final[Optional[int]]
explicit_port: Final[Optional[int]]
raw_authority: Final[str]
authority: Final[str]
raw_path: Final[str]
path: Final[str]
raw_query_string: Final[str]
query_string: Final[str]
path_qs: Final[str]
raw_path_qs: Final[str]
raw_fragment: Final[str]
fragment: Final[str]
query: Final[multidict.MultiDict[str]]
raw_name: Final[str]
name: Final[str]
raw_suffix: Final[str]
suffix: Final[str]
raw_suffixes: Final[Tuple[str, ...]]
suffixes: Final[Tuple[str, ...]]
raw_parts: Final[Tuple[str, ...]]
parts: Final[Tuple[str, ...]]
parent: Final[URL]
def __init__(
self, val: Union[str, "URL"] = ..., *, encoded: bool = ...
) -> None: ...
@classmethod
def build(
cls,
*,
scheme: str = ...,
authority: str = ...,
user: Optional[str] = ...,
password: Optional[str] = ...,
host: str = ...,
port: Optional[int] = ...,
path: str = ...,
query: Optional[_Query] = ...,
query_string: str = ...,
fragment: str = ...,
encoded: bool = ...
) -> URL: ...
def __str__(self) -> str: ...
def __repr__(self) -> str: ...
def __bytes__(self) -> bytes: ...
def __eq__(self, other: Any) -> bool: ...
def __le__(self, other: Any) -> bool: ...
def __lt__(self, other: Any) -> bool: ...
def __ge__(self, other: Any) -> bool: ...
def __gt__(self, other: Any) -> bool: ...
def __hash__(self) -> int: ...
def __truediv__(self, name: str) -> URL: ...
def __mod__(self, query: _Query) -> URL: ...
def is_absolute(self) -> bool: ...
def is_default_port(self) -> bool: ...
def origin(self) -> URL: ...
def relative(self) -> URL: ...
def with_scheme(self, scheme: str) -> URL: ...
def with_user(self, user: Optional[str]) -> URL: ...
def with_password(self, password: Optional[str]) -> URL: ...
def with_host(self, host: str) -> URL: ...
def with_port(self, port: Optional[int]) -> URL: ...
def with_path(self, path: str, *, encoded: bool = ...) -> URL: ...
@overload
def with_query(self, query: _Query) -> URL: ...
@overload
def with_query(self, **kwargs: _QueryVariable) -> URL: ...
@overload
def update_query(self, query: _Query) -> URL: ...
@overload
def update_query(self, **kwargs: _QueryVariable) -> URL: ...
def with_fragment(self, fragment: Optional[str]) -> URL: ...
def with_name(self, name: str) -> URL: ...
def with_suffix(self, suffix: str) -> URL: ...
def join(self, url: URL) -> URL: ...
def joinpath(self, *url: str, encoded: bool = ...) -> URL: ...
def human_repr(self) -> str: ...
# private API
@classmethod
def _normalize_path(cls, path: str) -> str: ...
@final
class cached_property:
def __init__(self, wrapped: Any) -> None: ...
def __get__(self, inst: URL, owner: Type[URL]) -> Any: ...
def __set__(self, inst: URL, value: Any) -> None: ...
class CacheInfo(TypedDict):
idna_encode: _CacheInfo
idna_decode: _CacheInfo
def cache_clear() -> None: ...
def cache_info() -> CacheInfo: ...
def cache_configure(
*, idna_encode_size: Optional[int] = ..., idna_decode_size: Optional[int] = ...
) -> None: ...

View File

@@ -0,0 +1,18 @@
import os
import sys
__all__ = ("_Quoter", "_Unquoter")
NO_EXTENSIONS = bool(os.environ.get("YARL_NO_EXTENSIONS")) # type: bool
if sys.implementation.name != "cpython":
NO_EXTENSIONS = True
if not NO_EXTENSIONS: # pragma: no branch
try:
from ._quoting_c import _Quoter, _Unquoter # type: ignore[assignment]
except ImportError: # pragma: no cover
from ._quoting_py import _Quoter, _Unquoter # type: ignore[assignment]
else:
from ._quoting_py import _Quoter, _Unquoter # type: ignore[assignment]

View File

@@ -0,0 +1,16 @@
from typing import Optional
class _Quoter:
def __init__(
self,
*,
safe: str = ...,
protected: str = ...,
qs: bool = ...,
requote: bool = ...
) -> None: ...
def __call__(self, val: Optional[str] = ...) -> Optional[str]: ...
class _Unquoter:
def __init__(self, *, unsafe: str = ..., qs: bool = ...) -> None: ...
def __call__(self, val: Optional[str] = ...) -> Optional[str]: ...

View File

@@ -0,0 +1,371 @@
# cython: language_level=3
from cpython.exc cimport PyErr_NoMemory
from cpython.mem cimport PyMem_Free, PyMem_Malloc, PyMem_Realloc
from cpython.unicode cimport PyUnicode_DecodeASCII, PyUnicode_DecodeUTF8Stateful
from libc.stdint cimport uint8_t, uint64_t
from libc.string cimport memcpy, memset
from string import ascii_letters, digits
cdef str GEN_DELIMS = ":/?#[]@"
cdef str SUB_DELIMS_WITHOUT_QS = "!$'()*,"
cdef str SUB_DELIMS = SUB_DELIMS_WITHOUT_QS + '+?=;'
cdef str RESERVED = GEN_DELIMS + SUB_DELIMS
cdef str UNRESERVED = ascii_letters + digits + '-._~'
cdef str ALLOWED = UNRESERVED + SUB_DELIMS_WITHOUT_QS
cdef str QS = '+&=;'
DEF BUF_SIZE = 8 * 1024 # 8KiB
cdef char BUFFER[BUF_SIZE]
cdef inline Py_UCS4 _to_hex(uint8_t v):
if v < 10:
return <Py_UCS4>(v+0x30) # ord('0') == 0x30
else:
return <Py_UCS4>(v+0x41-10) # ord('A') == 0x41
cdef inline int _from_hex(Py_UCS4 v):
if '0' <= v <= '9':
return <int>(v) - 0x30 # ord('0') == 0x30
elif 'A' <= v <= 'F':
return <int>(v) - 0x41 + 10 # ord('A') == 0x41
elif 'a' <= v <= 'f':
return <int>(v) - 0x61 + 10 # ord('a') == 0x61
else:
return -1
cdef inline int _is_lower_hex(Py_UCS4 v):
return 'a' <= v <= 'f'
cdef inline Py_UCS4 _restore_ch(Py_UCS4 d1, Py_UCS4 d2):
cdef int digit1 = _from_hex(d1)
if digit1 < 0:
return <Py_UCS4>-1
cdef int digit2 = _from_hex(d2)
if digit2 < 0:
return <Py_UCS4>-1
return <Py_UCS4>(digit1 << 4 | digit2)
cdef uint8_t ALLOWED_TABLE[16]
cdef uint8_t ALLOWED_NOTQS_TABLE[16]
cdef inline bint bit_at(uint8_t array[], uint64_t ch):
return array[ch >> 3] & (1 << (ch & 7))
cdef inline void set_bit(uint8_t array[], uint64_t ch):
array[ch >> 3] |= (1 << (ch & 7))
memset(ALLOWED_TABLE, 0, sizeof(ALLOWED_TABLE))
memset(ALLOWED_NOTQS_TABLE, 0, sizeof(ALLOWED_NOTQS_TABLE))
for i in range(128):
if chr(i) in ALLOWED:
set_bit(ALLOWED_TABLE, i)
set_bit(ALLOWED_NOTQS_TABLE, i)
if chr(i) in QS:
set_bit(ALLOWED_NOTQS_TABLE, i)
# ----------------- writer ---------------------------
cdef struct Writer:
char *buf
Py_ssize_t size
Py_ssize_t pos
bint changed
cdef inline void _init_writer(Writer* writer):
writer.buf = &BUFFER[0]
writer.size = BUF_SIZE
writer.pos = 0
writer.changed = 0
cdef inline void _release_writer(Writer* writer):
if writer.buf != BUFFER:
PyMem_Free(writer.buf)
cdef inline int _write_char(Writer* writer, Py_UCS4 ch, bint changed):
cdef char * buf
cdef Py_ssize_t size
if writer.pos == writer.size:
# reallocate
size = writer.size + BUF_SIZE
if writer.buf == BUFFER:
buf = <char*>PyMem_Malloc(size)
if buf == NULL:
PyErr_NoMemory()
return -1
memcpy(buf, writer.buf, writer.size)
else:
buf = <char*>PyMem_Realloc(writer.buf, size)
if buf == NULL:
PyErr_NoMemory()
return -1
writer.buf = buf
writer.size = size
writer.buf[writer.pos] = <char>ch
writer.pos += 1
writer.changed |= changed
return 0
cdef inline int _write_pct(Writer* writer, uint8_t ch, bint changed):
if _write_char(writer, '%', changed) < 0:
return -1
if _write_char(writer, _to_hex(<uint8_t>ch >> 4), changed) < 0:
return -1
return _write_char(writer, _to_hex(<uint8_t>ch & 0x0f), changed)
cdef inline int _write_utf8(Writer* writer, Py_UCS4 symbol):
cdef uint64_t utf = <uint64_t> symbol
if utf < 0x80:
return _write_pct(writer, <uint8_t>utf, True)
elif utf < 0x800:
if _write_pct(writer, <uint8_t>(0xc0 | (utf >> 6)), True) < 0:
return -1
return _write_pct(writer, <uint8_t>(0x80 | (utf & 0x3f)), True)
elif 0xD800 <= utf <= 0xDFFF:
# surogate pair, ignored
return 0
elif utf < 0x10000:
if _write_pct(writer, <uint8_t>(0xe0 | (utf >> 12)), True) < 0:
return -1
if _write_pct(writer, <uint8_t>(0x80 | ((utf >> 6) & 0x3f)),
True) < 0:
return -1
return _write_pct(writer, <uint8_t>(0x80 | (utf & 0x3f)), True)
elif utf > 0x10FFFF:
# symbol is too large
return 0
else:
if _write_pct(writer, <uint8_t>(0xf0 | (utf >> 18)), True) < 0:
return -1
if _write_pct(writer, <uint8_t>(0x80 | ((utf >> 12) & 0x3f)),
True) < 0:
return -1
if _write_pct(writer, <uint8_t>(0x80 | ((utf >> 6) & 0x3f)),
True) < 0:
return -1
return _write_pct(writer, <uint8_t>(0x80 | (utf & 0x3f)), True)
# --------------------- end writer --------------------------
cdef class _Quoter:
cdef bint _qs
cdef bint _requote
cdef uint8_t _safe_table[16]
cdef uint8_t _protected_table[16]
def __init__(
self, *, str safe='', str protected='', bint qs=False, bint requote=True,
):
cdef Py_UCS4 ch
self._qs = qs
self._requote = requote
if not self._qs:
memcpy(self._safe_table,
ALLOWED_NOTQS_TABLE,
sizeof(self._safe_table))
else:
memcpy(self._safe_table,
ALLOWED_TABLE,
sizeof(self._safe_table))
for ch in safe:
if ord(ch) > 127:
raise ValueError("Only safe symbols with ORD < 128 are allowed")
set_bit(self._safe_table, ch)
memset(self._protected_table, 0, sizeof(self._protected_table))
for ch in protected:
if ord(ch) > 127:
raise ValueError("Only safe symbols with ORD < 128 are allowed")
set_bit(self._safe_table, ch)
set_bit(self._protected_table, ch)
def __call__(self, val):
cdef Writer writer
if val is None:
return None
if type(val) is not str:
if isinstance(val, str):
# derived from str
val = str(val)
else:
raise TypeError("Argument should be str")
_init_writer(&writer)
try:
return self._do_quote(<str>val, &writer)
finally:
_release_writer(&writer)
cdef str _do_quote(self, str val, Writer *writer):
cdef Py_UCS4 ch
cdef int changed
cdef int idx = 0
cdef int length = len(val)
while idx < length:
ch = val[idx]
idx += 1
if ch == '%' and self._requote and idx <= length - 2:
ch = _restore_ch(val[idx], val[idx + 1])
if ch != <Py_UCS4>-1:
idx += 2
if ch < 128:
if bit_at(self._protected_table, ch):
if _write_pct(writer, ch, True) < 0:
raise
continue
if bit_at(self._safe_table, ch):
if _write_char(writer, ch, True) < 0:
raise
continue
changed = (_is_lower_hex(val[idx - 2]) or
_is_lower_hex(val[idx - 1]))
if _write_pct(writer, ch, changed) < 0:
raise
continue
else:
ch = '%'
if self._write(writer, ch) < 0:
raise
if not writer.changed:
return val
else:
return PyUnicode_DecodeASCII(writer.buf, writer.pos, "strict")
cdef inline int _write(self, Writer *writer, Py_UCS4 ch):
if self._qs:
if ch == ' ':
return _write_char(writer, '+', True)
if ch < 128 and bit_at(self._safe_table, ch):
return _write_char(writer, ch, False)
return _write_utf8(writer, ch)
cdef class _Unquoter:
cdef str _unsafe
cdef bint _qs
cdef _Quoter _quoter
cdef _Quoter _qs_quoter
def __init__(self, *, unsafe='', qs=False):
self._unsafe = unsafe
self._qs = qs
self._quoter = _Quoter()
self._qs_quoter = _Quoter(qs=True)
def __call__(self, val):
if val is None:
return None
if type(val) is not str:
if isinstance(val, str):
# derived from str
val = str(val)
else:
raise TypeError("Argument should be str")
return self._do_unquote(<str>val)
cdef str _do_unquote(self, str val):
if len(val) == 0:
return val
cdef list ret = []
cdef char buffer[4]
cdef Py_ssize_t buflen = 0
cdef Py_ssize_t consumed
cdef str unquoted
cdef Py_UCS4 ch = 0
cdef Py_ssize_t idx = 0
cdef Py_ssize_t length = len(val)
cdef Py_ssize_t start_pct
while idx < length:
ch = val[idx]
idx += 1
if ch == '%' and idx <= length - 2:
ch = _restore_ch(val[idx], val[idx + 1])
if ch != <Py_UCS4>-1:
idx += 2
assert buflen < 4
buffer[buflen] = ch
buflen += 1
try:
unquoted = PyUnicode_DecodeUTF8Stateful(buffer, buflen,
NULL, &consumed)
except UnicodeDecodeError:
start_pct = idx - buflen * 3
buffer[0] = ch
buflen = 1
ret.append(val[start_pct : idx - 3])
try:
unquoted = PyUnicode_DecodeUTF8Stateful(buffer, buflen,
NULL, &consumed)
except UnicodeDecodeError:
buflen = 0
ret.append(val[idx - 3 : idx])
continue
if not unquoted:
assert consumed == 0
continue
assert consumed == buflen
buflen = 0
if self._qs and unquoted in '+=&;':
ret.append(self._qs_quoter(unquoted))
elif unquoted in self._unsafe:
ret.append(self._quoter(unquoted))
else:
ret.append(unquoted)
continue
else:
ch = '%'
if buflen:
start_pct = idx - 1 - buflen * 3
ret.append(val[start_pct : idx - 1])
buflen = 0
if ch == '+':
if not self._qs or ch in self._unsafe:
ret.append('+')
else:
ret.append(' ')
continue
if ch in self._unsafe:
ret.append('%')
h = hex(ord(ch)).upper()[2:]
for ch in h:
ret.append(ch)
continue
ret.append(ch)
if buflen:
ret.append(val[length - buflen * 3 : length])
return ''.join(ret)

View File

@@ -0,0 +1,197 @@
import codecs
import re
from string import ascii_letters, ascii_lowercase, digits
from typing import Optional, cast
BASCII_LOWERCASE = ascii_lowercase.encode("ascii")
BPCT_ALLOWED = {f"%{i:02X}".encode("ascii") for i in range(256)}
GEN_DELIMS = ":/?#[]@"
SUB_DELIMS_WITHOUT_QS = "!$'()*,"
SUB_DELIMS = SUB_DELIMS_WITHOUT_QS + "+&=;"
RESERVED = GEN_DELIMS + SUB_DELIMS
UNRESERVED = ascii_letters + digits + "-._~"
ALLOWED = UNRESERVED + SUB_DELIMS_WITHOUT_QS
_IS_HEX = re.compile(b"[A-Z0-9][A-Z0-9]")
_IS_HEX_STR = re.compile("[A-Fa-f0-9][A-Fa-f0-9]")
utf8_decoder = codecs.getincrementaldecoder("utf-8")
class _Quoter:
def __init__(
self,
*,
safe: str = "",
protected: str = "",
qs: bool = False,
requote: bool = True,
) -> None:
self._safe = safe
self._protected = protected
self._qs = qs
self._requote = requote
def __call__(self, val: Optional[str]) -> Optional[str]:
if val is None:
return None
if not isinstance(val, str):
raise TypeError("Argument should be str")
if not val:
return ""
bval = cast(str, val).encode("utf8", errors="ignore")
ret = bytearray()
pct = bytearray()
safe = self._safe
safe += ALLOWED
if not self._qs:
safe += "+&=;"
safe += self._protected
bsafe = safe.encode("ascii")
idx = 0
while idx < len(bval):
ch = bval[idx]
idx += 1
if pct:
if ch in BASCII_LOWERCASE:
ch = ch - 32 # convert to uppercase
pct.append(ch)
if len(pct) == 3: # pragma: no branch # peephole optimizer
buf = pct[1:]
if not _IS_HEX.match(buf):
ret.extend(b"%25")
pct.clear()
idx -= 2
continue
try:
unquoted = chr(int(pct[1:].decode("ascii"), base=16))
except ValueError:
ret.extend(b"%25")
pct.clear()
idx -= 2
continue
if unquoted in self._protected:
ret.extend(pct)
elif unquoted in safe:
ret.append(ord(unquoted))
else:
ret.extend(pct)
pct.clear()
# special case, if we have only one char after "%"
elif len(pct) == 2 and idx == len(bval):
ret.extend(b"%25")
pct.clear()
idx -= 1
continue
elif ch == ord("%") and self._requote:
pct.clear()
pct.append(ch)
# special case if "%" is last char
if idx == len(bval):
ret.extend(b"%25")
continue
if self._qs:
if ch == ord(" "):
ret.append(ord("+"))
continue
if ch in bsafe:
ret.append(ch)
continue
ret.extend((f"%{ch:02X}").encode("ascii"))
ret2 = ret.decode("ascii")
if ret2 == val:
return val
return ret2
class _Unquoter:
def __init__(self, *, unsafe: str = "", qs: bool = False) -> None:
self._unsafe = unsafe
self._qs = qs
self._quoter = _Quoter()
self._qs_quoter = _Quoter(qs=True)
def __call__(self, val: Optional[str]) -> Optional[str]:
if val is None:
return None
if not isinstance(val, str):
raise TypeError("Argument should be str")
if not val:
return ""
decoder = cast(codecs.BufferedIncrementalDecoder, utf8_decoder())
ret = []
idx = 0
while idx < len(val):
ch = val[idx]
idx += 1
if ch == "%" and idx <= len(val) - 2:
pct = val[idx : idx + 2]
if _IS_HEX_STR.fullmatch(pct):
b = bytes([int(pct, base=16)])
idx += 2
try:
unquoted = decoder.decode(b)
except UnicodeDecodeError:
start_pct = idx - 3 - len(decoder.buffer) * 3
ret.append(val[start_pct : idx - 3])
decoder.reset()
try:
unquoted = decoder.decode(b)
except UnicodeDecodeError:
ret.append(val[idx - 3 : idx])
continue
if not unquoted:
continue
if self._qs and unquoted in "+=&;":
to_add = self._qs_quoter(unquoted)
if to_add is None: # pragma: no cover
raise RuntimeError("Cannot quote None")
ret.append(to_add)
elif unquoted in self._unsafe:
to_add = self._quoter(unquoted)
if to_add is None: # pragma: no cover
raise RuntimeError("Cannot quote None")
ret.append(to_add)
else:
ret.append(unquoted)
continue
if decoder.buffer:
start_pct = idx - 1 - len(decoder.buffer) * 3
ret.append(val[start_pct : idx - 1])
decoder.reset()
if ch == "+":
if not self._qs or ch in self._unsafe:
ret.append("+")
else:
ret.append(" ")
continue
if ch in self._unsafe:
ret.append("%")
h = hex(ord(ch)).upper()[2:]
for ch in h:
ret.append(ch)
continue
ret.append(ch)
if decoder.buffer:
ret.append(val[-len(decoder.buffer) * 3 :])
ret2 = "".join(ret)
if ret2 == val:
return val
return ret2

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1 @@
# Placeholder