Skip to content

Sessionhandling

sessionhandling

Module with session handling functions

Session handling functions include: - start a webdriver session with a new driver including logging - connect a webdriver session to a running via debug port (Chrome only). - browser setting bundling functions for a) direct download b) disabling notifications c) optimized scraping

Currently full support is implemented for Chrome and Firefox only.

__all__ = ['initWebDriver', 'init_webdriver', 'directdownload', 'disablenotifications', 'connectChrome', 'connect_chrome', 'checkDebugport', 'check_debugport'] module-attribute

optimizedscraping_default = False module-attribute

stealthmode_default = False module-attribute

supported_browsers = {'chrome': ('chrome.exe', 'chromedriver.exe'), 'firefox': ('firefox.exe', 'geckodriver.exe')} module-attribute

checkDebugport(debugport: int) -> bool

checkDebugport - check port if activated as debugport

Parameters:

Name Type Description Default
debugport int

port number

required

Returns:

Name Type Description
bool bool

flag if port connects to a browser

Source code in src/utils_seleniumxp/sessionhandling.py
def checkDebugport(debugport: int) -> bool:
    """
    checkDebugport - check port if activated as debugport

    Args:
        debugport (int): port number

    Returns:
        bool: flag if port connects to a browser
    """
    return check_debugport(debugport)

check_configpath(config: configparser.ConfigParser, inisection: str, configparampath: str, binaryfile: str = '', checkwd: bool = False) -> Optional[str]

check_configpath - check path for binaries

Source code in src/utils_seleniumxp/sessionhandling.py
def check_configpath(
    config: configparser.ConfigParser,
    inisection: str,
    configparampath: str,
    binaryfile: str = "",
    checkwd: bool = False
) -> Optional[str]:
    """
    check_configpath - check path for binaries
    """

    checkpath: str = config.get(inisection, configparampath, fallback="")

    if checkpath != "":
        checkpath = os.path.expandvars(checkpath)
        if binaryfile:
            if os.path.isdir(checkpath):
                if os.path.isfile(os.path.join(checkpath, binaryfile)):
                    return checkpath
            if checkwd:
                checkpath = os.getcwd()
                if os.path.isfile(os.path.join(checkpath, binaryfile)):
                    return checkpath
        elif os.path.isdir(checkpath):
            return checkpath

    return ""

check_debugport(debugport: int) -> bool

check_debugport - check port if activated as debugport

Parameters:

Name Type Description Default
debugport int

port number

required

Returns:

Name Type Description
bool bool

flag if port connects to a browser

Source code in src/utils_seleniumxp/sessionhandling.py
def check_debugport(debugport: int) -> bool:
    """
    check_debugport - check port if activated as debugport

    Args:
        debugport (int): port number

    Returns:
        bool: flag if port connects to a browser
    """

    checkDbgPortOK = False
    try:
        response = requests.get("http://localhost:" + str(debugport))
    except Exception:
        return False

    checkDbgPortOK = ((response.status_code == 200) and (response.reason == "OK"))

    return checkDbgPortOK

check_usrpath(config: configparser.ConfigParser, inisection: str, configparampathsuffix: str) -> Optional[str]

check_usrpath - check path für user path suffix from config file

Source code in src/utils_seleniumxp/sessionhandling.py
def check_usrpath(config: configparser.ConfigParser, inisection: str, configparampathsuffix: str) -> Optional[str]:
    """
    check_usrpath - check path für user path suffix from config file
    """

    homepath = os.path.expanduser("~")
    try:
        checkpath = os.path.join(homepath, os.path.expandvars(config[inisection][configparampathsuffix]))
        if os.path.isdir(checkpath):
            return checkpath
    except Exception:
        pass

    return ""

close_log_closepopup(webdriver: utils_seleniumxp._RemoteWebDriver) -> None

close_log_closepopup - close logger for logging close popups

Parameters:

Name Type Description Default
webdriver _RemoteWebDriver

webdriver object

required
Source code in src/utils_seleniumxp/sessionhandling.py
def close_log_closepopup(webdriver: utils_seleniumxp._RemoteWebDriver) -> None:
    """
    close_log_closepopup - close logger for logging close popups

    Args:
        webdriver (utils_seleniumxp._RemoteWebDriver): webdriver object
    """

    if hasattr(webdriver, "closepopup_logger"):
        if webdriver.closepopup_logger is not None:
            for handler in webdriver.closepopup_logger.handlers:
                handler.close()

close_log_sessionstart(sessionstart_logger: Optional[logging.Logger]) -> None

close_log_sessionstart - close logger for logging session start

Parameters:

Name Type Description Default
sessionstart_logger Optional[Logger]

logger

required
Source code in src/utils_seleniumxp/sessionhandling.py
def close_log_sessionstart(sessionstart_logger: Optional[logging.Logger]) -> None:
    """
    close_log_sessionstart - close logger for logging session start

    Args:
        sessionstart_logger (Optional[logging.Logger]): logger
    """

    if sessionstart_logger is not None:
        for handler in sessionstart_logger.handlers:
            handler.close()

connectChrome(debugport: int, inifile: Optional[str] = None, inisection: str = 'DEFAULT', mixin: bool = utils_seleniumxp.mixinactive, eventlistener: Optional[utils_seleniumxp.AbstractEventListener] = None, URL: str = 'about:blank') -> Optional[utils_seleniumxp._RemoteWebDriver]

connectChrome - connect to Chrome instance via debugport

Parameters:

Name Type Description Default
debugport int

assumed debug port of Chrome instance

required
inifile Optional[str]

INI file. Defaults to None.

None
inisection str

INI section to be evaluated. Defaults to "DEFAULT".

'DEFAULT'
mixin bool

flag to switch between mixin and setattr mode. Defaults to utils_seleniumxp.mixinactive.

mixinactive
eventlistener Optional[AbstractEventListenerExtended]

eventlistener object to activate eventfiring mode. Defaults to None.

None
URL str

start URL. Defaults to "about:blank".

'about:blank'

Returns:

Type Description
Optional[_RemoteWebDriver]

utils_seleniumxp.WebDriver: webdriver object

Source code in src/utils_seleniumxp/sessionhandling.py
def connectChrome(
    debugport: int,
    inifile: Optional[str] = None,
    inisection: str = "DEFAULT",
    mixin: bool = utils_seleniumxp.mixinactive,
    eventlistener: Optional[utils_seleniumxp.AbstractEventListener] = None,
    URL: str = "about:blank"
) -> Optional[utils_seleniumxp._RemoteWebDriver]:
    """
    connectChrome - connect to Chrome instance via debugport

    Args:
        debugport (int): assumed debug port of Chrome instance
        inifile (Optional[str], optional): INI file. Defaults to None.
        inisection (str, optional): INI section to be evaluated. Defaults to "DEFAULT".
        mixin (bool, optional): flag to switch between mixin and setattr mode. Defaults to utils_seleniumxp.mixinactive.
        eventlistener (Optional[utils_seleniumxp.eventfiring_addon.AbstractEventListenerExtended], optional): eventlistener object to activate eventfiring mode. Defaults to None.
        URL (str, optional): start URL. Defaults to "about:blank".

    Returns:
        utils_seleniumxp.WebDriver: webdriver object
    """
    return connect_chrome(debugport, inifile, inisection, mixin, eventlistener, URL)

connect_chrome(debugport: int, inifile: Optional[str] = None, inisection: str = 'DEFAULT', mixin: bool = utils_seleniumxp.mixinactive, eventlistener: Optional[utils_seleniumxp.AbstractEventListener] = None, URL: str = 'about:blank') -> Optional[utils_seleniumxp._RemoteWebDriver]

connect_chrome - connect to Chrome instance via debugport

Parameters:

Name Type Description Default
debugport int

assumed debug port of Chrome instance

required
inifile Optional[str]

INI file. Defaults to None.

None
inisection str

INI section to be evaluated. Defaults to "DEFAULT".

'DEFAULT'
mixin bool

flag to switch between mixin and setattr mode. Defaults to utils_seleniumxp.mixinactive.

mixinactive
eventlistener Optional[AbstractEventListenerExtended]

eventlistener object to activate eventfiring mode. Defaults to None.

None
URL str

start URL. Defaults to "about:blank".

'about:blank'

Returns:

Type Description
Optional[_RemoteWebDriver]

utils_seleniumxp.WebDriver: webdriver object

Source code in src/utils_seleniumxp/sessionhandling.py
def connect_chrome(
    debugport: int,
    inifile: Optional[str] = None,
    inisection: str = "DEFAULT",
    mixin: bool = utils_seleniumxp.mixinactive,
    eventlistener: Optional[utils_seleniumxp.AbstractEventListener] = None,
    URL: str = "about:blank"
) -> Optional[utils_seleniumxp._RemoteWebDriver]:
    """
    connect_chrome - connect to Chrome instance via debugport

    Args:
        debugport (int): assumed debug port of Chrome instance
        inifile (Optional[str], optional): INI file. Defaults to None.
        inisection (str, optional): INI section to be evaluated. Defaults to "DEFAULT".
        mixin (bool, optional): flag to switch between mixin and setattr mode. Defaults to utils_seleniumxp.mixinactive.
        eventlistener (Optional[utils_seleniumxp.eventfiring_addon.AbstractEventListenerExtended], optional): eventlistener object to activate eventfiring mode. Defaults to None.
        URL (str, optional): start URL. Defaults to "about:blank".

    Returns:
        utils_seleniumxp.WebDriver: webdriver object
    """

    # check if debugport active with browser instance
    if not check_debugport(debugport):
        return None

    browser = "chrome"

    # read config file (standard: selenium.ini)
    config, config_without_default = read_configfile(inifile)

    driverpath: Optional[str] = None

    # check binaries from config (Windows only)
    if os.name == "nt":

        # use Selenium Manager as per default
        # if not config.getboolean(browser, "use_seleniummanager"):
        if not Utils.to_bool(config[browser]["use_seleniummanager"]):
            # check/set webdriver executable path
            driverbinary: str = supported_browsers[browser][1]
            driverpath = check_configpath(config_without_default, inisection, "driverpath", driverbinary, checkwd=False)
            if driverpath == "":
                driverpath = check_configpath(config, browser, "driverpath", driverbinary, checkwd=True)
            if driverpath == "":
                driverpath = check_usrpath(config, browser, "usrpath_suffix_driver")
            if driverpath == "":
                err_msg = f"Driver {driverbinary} for Browser '{browser}' could not be found in path defined in config file or current working directory."
                raise utils_seleniumxp.ErrorUtilsSelenium(err_msg)
            else:
                # set PATH
                envpath = os.environ["PATH"]
                if driverpath not in envpath:  # type: ignore[operator]
                    os.environ["PATH"] += os.pathsep + driverpath  # type: ignore[operator]

    # create browser options / profile object and set preferences
    browsersettings = utils_seleniumxp.WebDriver.ChromeOptions()
    # connect to existing browser instance via debug-port (chrome only)
    browsersettings.add_experimental_option("debuggerAddress", f"localhost:{debugport}")

    # instantiate Selenium webdriver, load blank page
    if mixin or utils_seleniumxp.mixinactive:
        webdriver = utils_seleniumxp.webdriver_addon.WebDriverMixedin(utils_seleniumxp.WebDriver.Chrome)(
            options=browsersettings
        )
    else:
        webdriver = utils_seleniumxp.webdriver_addon.WebDriverMixedinOnly3rdParty(utils_seleniumxp.WebDriver.Chrome)(
            options=browsersettings
        )
    if eventlistener is not None:
        if mixin or utils_seleniumxp.mixinactive:
            webdriver = utils_seleniumxp.eventfiring_addon.EventFiringWebDriverExtended(webdriver, eventlistener)  # type: ignore[assignment]
        else:
            webdriver = utils_seleniumxp.webdriver_addon.EventFiringWebDriverExtendedMixedin(webdriver, eventlistener)  # type: ignore[assignment]
    # webdriver.get("about:blank")

    # set logging for closepopup
    set_log_closepopup(webdriver, config, inisection)

    # get URL
    if URL != "about:blank":
        webdriver.get(URL)

    # make sure closepopup log is closed when application ends
    atexit.register(close_log_closepopup, webdriver)
    # make sure webdriver process(es) is/are killed when application ends
    # -> does not need to happen as chromedriver.exe of calling application might be killed?
    # atexit.register(os.kill, webdriver.service.process.pid, signal.SIGTERM)
    atexit.register(kill_driver_processes, browser, webdriver.service.process.pid)

    return webdriver

directdownload(browser: str, defaultdir: Optional[str]) -> Union[dict, list, None]

directdownload - create settings data object with config/options-settings for direct download

Parameters:

Name Type Description Default
browser str

browser

required
defaultdir str

target directory. Defaults to None.

required

Returns:

Type Description
Union[dict, list, None]

Union[dict, list, None]: settings data object

Source code in src/utils_seleniumxp/sessionhandling.py
def directdownload(browser: str, defaultdir: Optional[str]) -> Union[dict, list, None]:
    """
    directdownload - create settings data object with config/options-settings for direct download

    Args:
        browser (str): browser
        defaultdir (str, optional): target directory. Defaults to None.

    Returns:
        Union[dict, list, None]: settings data object
    """

    if browser not in supported_browsers:
        err_msg = f"Browser '{browser}' not supported."
        raise utils_seleniumxp.ErrorUtilsSelenium(err_msg)

    if defaultdir is None:
        defaultdir = tempfile.gettempdir()

    if browser == "chrome":
        return {
            "download.default_directory": defaultdir,
            "download.prompt_for_download": False,
            "plugins.always_open_pdf_externally": True
        }
    elif browser == "firefox":
        return [
            ("browser.download.folderList", 2),
            ("browser.download.dir", defaultdir),
            ("browser.download.manager.showWhenStarting", False),
            ("browser.helperApps.alwaysAsk.force", False),
            ("browser.helperApps.neverAsk.saveToDisk", "application/pdf,text/csv"),
            ("pdfjs.disabled", True)
        ]
    return None

disablenotifications(browser: str) -> Union[dict, list, None]

disablenotifications - create settings data object with config/options-settings for disabling notifications

Parameters:

Name Type Description Default
browser str

browser

required

Returns:

Type Description
Union[dict, list, None]

Union[dict, list, None]: settings data object

Source code in src/utils_seleniumxp/sessionhandling.py
def disablenotifications(browser: str) -> Union[dict, list, None]:
    """
    disablenotifications - create settings data object with config/options-settings for disabling notifications

    Args:
        browser (str): browser

    Returns:
        Union[dict, list, None]: settings data object
    """

    if browser not in supported_browsers:
        err_msg = f"Browser '{browser}' not supported."
        raise utils_seleniumxp.ErrorUtilsSelenium(err_msg)

    if browser == "chrome":
        return {
            "profile.managed_default_content_settings.notifications": 2
        }
    elif browser == "firefox":
        return [
            ("dom.webnotifications.enabled", False),
        ]
    return None

initWebDriver(browser: Optional[str] = None, inifile: Optional[str] = None, inisection: str = 'DEFAULT', settings: list[Union[tuple[str], tuple[str, list[Any]]]] = [('disablenotifications',), ('directdownload', [tempfile.gettempdir()])], debugport: int = 0, implicitlywait: int = 10, maxpageload: int = 30, mixin: bool = utils_seleniumxp.mixinactive, eventlistener: Optional[utils_seleniumxp.eventfiring_addon.AbstractEventListenerExtended] = None, stealthmode: bool = stealthmode_default, optimizedscraping: bool = optimizedscraping_default, URL: str = 'about:blank', alt_cls_webdriverwrapper: Optional[utils_seleniumxp._RemoteWebDriver] = None, alt_cls_options: Union[utils_seleniumxp.WebDriver.ChromeOptions, utils_seleniumxp.WebDriver.FirefoxOptions, None] = None) -> utils_seleniumxp._RemoteWebDriver

initWebDriver - initialize Selenium webdriver session

INI file: The ini file provides a possibility to define profiles with extensions to be loaded and paths to the webdriver binary and extension files. A profile section defines the browser, the driver path, the extension path and the extensions to be loaded. Config file interpolation might be used for paths. Per driver a driver specific section is required defining at least the file for the extensions. Currently settings beyond paths and extensions are not supported.

settings: settings is provided as a list of callables. The callables must return a browser-specific settings data object. Refer to functions 'directdownload', 'disablenotifications' and 'optimizedscraping' as example.

alternative base class: To use special bases like undetected_chromedriver it is possible to provide an alternative browser class. Note that an alternative options class must be provided as well.

Parameters:

Name Type Description Default
browser Optional[str]

browser, read from INI file if not set. Defaults to None.

None
inifile Optional[str]

INI file. Defaults to None.

None
inisection str

INI section to be evaluated. Defaults to "DEFAULT".

'DEFAULT'
settings list[Union[tuple[str], tuple[str, list[Any]]]]

Browser preference setting functions. Defaults to [("disablenotifications",), ("directdownload", [tempfile.gettempdir()])].

[('disablenotifications',), ('directdownload', [gettempdir()])]
debugport int

set debugport (currently Chrome only). Defaults to 0.

0
implicitlywait int

implicit wait time. Defaults to 10.

10
maxpageload int

max pageload time. Defaults to 20.

30
mixin bool

flag to switch between mixin and setattr mode. Defaults to utils_seleniumxp.mixinactive.

mixinactive
eventlistener Optional[AbstractEventListenerExtended]

eventlistener object to activate eventfiring mode. Defaults to None.

None
stealthmode bool

flag to control selenium_stealth mode. Defaults to stealthmode_default.

stealthmode_default
optimizedscraping bool

flag to control optimized settings for scraping (no pictures etc.). Defaults to optimizedscraping_default.

optimizedscraping_default
URL str

start URL. Defaults to "about:blank".

'about:blank'
alt_cls_webdriverwrapper Optional[WebDriver]

optional base webdriver class. Defaults to None.

None
alt_cls_options Union[ChromeOptions, FirefoxOptions, None]

optional browser options class. Defaults to None.

None

Returns:

Type Description
_RemoteWebDriver

utils_seleniumxp.WebDriver: webdriver object

Source code in src/utils_seleniumxp/sessionhandling.py
def initWebDriver(
    browser: Optional[str] = None,
    inifile: Optional[str] = None,
    inisection: str = "DEFAULT",
    settings: list[Union[tuple[str], tuple[str, list[Any]]]] = [("disablenotifications",), ("directdownload", [tempfile.gettempdir()])],
    debugport: int = 0,
    implicitlywait: int = 10,
    maxpageload: int = 30,
    mixin: bool = utils_seleniumxp.mixinactive,
    eventlistener: Optional[utils_seleniumxp.eventfiring_addon.AbstractEventListenerExtended] = None,
    stealthmode: bool = stealthmode_default,
    optimizedscraping: bool = optimizedscraping_default,
    URL: str = "about:blank",
    alt_cls_webdriverwrapper: Optional[utils_seleniumxp._RemoteWebDriver] = None,
    alt_cls_options: Union[utils_seleniumxp.WebDriver.ChromeOptions, utils_seleniumxp.WebDriver.FirefoxOptions, None] = None
) -> utils_seleniumxp._RemoteWebDriver:
    """
    initWebDriver - initialize Selenium webdriver session

    INI file:
    The ini file provides a possibility to define profiles with extensions to be loaded and paths
    to the webdriver binary and extension files. A profile section defines the browser, the driver path,
    the extension path and the extensions to be loaded. Config file interpolation might be used for paths.
    Per driver a driver specific section is required defining at least the file for the extensions.
    Currently settings beyond paths and extensions are not supported.

    settings:
    settings is provided as a list of callables. The callables must return a browser-specific settings data object.
    Refer to functions 'directdownload', 'disablenotifications' and 'optimizedscraping' as example.

    alternative base class:
    To use special bases like undetected_chromedriver it is possible to provide an alternative
    browser class. Note that an alternative options class must be provided as well.

    Args:
        browser (Optional[str], optional): browser, read from INI file if not set. Defaults to None.
        inifile (Optional[str], optional): INI file. Defaults to None.
        inisection (str, optional): INI section to be evaluated. Defaults to "DEFAULT".
        settings (list[Union[tuple[str], tuple[str, list[Any]]]], optional): Browser preference setting functions. Defaults to [("disablenotifications",), ("directdownload", [tempfile.gettempdir()])].
        debugport (int, optional): set debugport (currently Chrome only). Defaults to 0.
        implicitlywait (int, optional): implicit wait time. Defaults to 10.
        maxpageload (int, optional): max pageload time. Defaults to 20.
        mixin (bool, optional): flag to switch between mixin and setattr mode. Defaults to utils_seleniumxp.mixinactive.
        eventlistener (Optional[utils_seleniumxp.eventfiring_addon.AbstractEventListenerExtended], optional): eventlistener object to activate eventfiring mode. Defaults to None.
        stealthmode (bool, optional): flag to control selenium_stealth mode. Defaults to stealthmode_default.
        optimizedscraping (bool, optional): flag to control optimized settings for scraping (no pictures etc.). Defaults to optimizedscraping_default.
        URL (str, optional): start URL. Defaults to "about:blank".
        alt_cls_webdriverwrapper (Optional[utils_seleniumxp.WebDriver], optional): optional base webdriver class. Defaults to None.
        alt_cls_options (Union[utils_seleniumxp.WebDriver.ChromeOptions, utils_seleniumxp.WebDriver.FirefoxOptions, None], optional): optional browser options class. Defaults to None.

    Returns:
        utils_seleniumxp.WebDriver: webdriver object
    """
    return init_webdriver(
        browser, inifile, inisection, settings, debugport,
        implicitlywait, maxpageload, mixin, eventlistener,
        stealthmode, optimizedscraping, URL,
        alt_cls_webdriverwrapper, alt_cls_options
    )

init_webdriver(browser: Optional[str] = None, inifile: Optional[str] = None, inisection: str = 'DEFAULT', settings: list[Union[tuple[str], tuple[str, list[Any]]]] = [('disablenotifications',), ('directdownload', [tempfile.gettempdir()])], debugport: int = 0, implicitlywait: int = 10, maxpageload: int = 30, mixin: bool = utils_seleniumxp.mixinactive, eventlistener: Optional[utils_seleniumxp.eventfiring_addon.AbstractEventListenerExtended] = None, stealthmode: bool = stealthmode_default, optimizedscraping: bool = optimizedscraping_default, URL: str = 'about:blank', alt_cls_webdriverwrapper: Optional[type[utils_seleniumxp._RemoteWebDriver]] = None, alt_cls_options: Union[type[utils_seleniumxp.WebDriver.ChromeOptions], type[utils_seleniumxp.WebDriver.FirefoxOptions], None] = None) -> utils_seleniumxp._RemoteWebDriver

init_webdriver - initialize Selenium webdriver session

INI file: The ini file provides a possibility to define profiles with extensions to be loaded and paths to the webdriver binary and extension files. A profile section defines the browser, the driver path, the extension path and the extensions to be loaded. Config file interpolation might be used for paths. Per driver a driver specific section is required defining at least the file for the extensions. Currently settings beyond paths and extensions are not supported.

settings: settings is provided as a list of callables. The callables must return a browser-specific settings data object. Refer to functions 'directdownload', 'disablenotifications' and 'optimizedscraping' as example.

alternative base class: To use special bases like undetected_chromedriver it is possible to provide an alternative browser class. Note that an alternative options class must be provided as well.

Parameters:

Name Type Description Default
browser Optional[str]

browser, read from INI file if not set. Defaults to None.

None
inifile Optional[str]

INI file. Defaults to None.

None
inisection str

INI section to be evaluated. Defaults to "DEFAULT".

'DEFAULT'
settings list[Union[tuple[str], tuple[str, list[Any]]]]

Browser preference setting functions. Defaults to [("disablenotifications",), ("directdownload", [tempfile.gettempdir()])].

[('disablenotifications',), ('directdownload', [gettempdir()])]
debugport int

set debugport (currently Chrome only). Defaults to 0.

0
implicitlywait int

implicit wait time. Defaults to 10.

10
maxpageload int

max pageload time. Defaults to 20.

30
mixin bool

flag to switch between mixin and setattr mode. Defaults to utils_seleniumxp.mixinactive.

mixinactive
eventlistener Optional[AbstractEventListenerExtended]

eventlistener object to activate eventfiring mode. Defaults to None.

None
stealthmode bool

flag to control selenium_stealth mode. Defaults to stealthmode_default.

stealthmode_default
optimizedscraping bool

flag to control optimized settings for scraping (no pictures etc.). Defaults to optimizedscraping_default.

optimizedscraping_default
URL str

start URL. Defaults to "about:blank".

'about:blank'
alt_cls_webdriverwrapper Optional[WebDriver]

optional base webdriver class. Defaults to None.

None
alt_cls_options Union[ChromeOptions, FirefoxOptions, None]

optional browser options class. Defaults to None.

None

Returns:

Type Description
_RemoteWebDriver

utils_seleniumxp.WebDriver: webdriver object

Source code in src/utils_seleniumxp/sessionhandling.py
def init_webdriver(
    browser: Optional[str] = None,
    inifile: Optional[str] = None,
    inisection: str = "DEFAULT",
    settings: list[Union[tuple[str], tuple[str, list[Any]]]] = [("disablenotifications",), ("directdownload", [tempfile.gettempdir()])],
    debugport: int = 0,
    implicitlywait: int = 10,
    maxpageload: int = 30,
    mixin: bool = utils_seleniumxp.mixinactive,
    eventlistener: Optional[utils_seleniumxp.eventfiring_addon.AbstractEventListenerExtended] = None,
    stealthmode: bool = stealthmode_default,
    optimizedscraping: bool = optimizedscraping_default,
    URL: str = "about:blank",
    alt_cls_webdriverwrapper: Optional[type[utils_seleniumxp._RemoteWebDriver]] = None,
    alt_cls_options: Union[type[utils_seleniumxp.WebDriver.ChromeOptions], type[utils_seleniumxp.WebDriver.FirefoxOptions], None] = None
) -> utils_seleniumxp._RemoteWebDriver:
    """
    init_webdriver - initialize Selenium webdriver session

    INI file:
    The ini file provides a possibility to define profiles with extensions to be loaded and paths
    to the webdriver binary and extension files. A profile section defines the browser, the driver path,
    the extension path and the extensions to be loaded. Config file interpolation might be used for paths.
    Per driver a driver specific section is required defining at least the file for the extensions.
    Currently settings beyond paths and extensions are not supported.

    settings:
    settings is provided as a list of callables. The callables must return a browser-specific settings data object.
    Refer to functions 'directdownload', 'disablenotifications' and 'optimizedscraping' as example.

    alternative base class:
    To use special bases like undetected_chromedriver it is possible to provide an alternative
    browser class. Note that an alternative options class must be provided as well.

    Args:
        browser (Optional[str], optional): browser, read from INI file if not set. Defaults to None.
        inifile (Optional[str], optional): INI file. Defaults to None.
        inisection (str, optional): INI section to be evaluated. Defaults to "DEFAULT".
        settings (list[Union[tuple[str], tuple[str, list[Any]]]], optional): Browser preference setting functions. Defaults to [("disablenotifications",), ("directdownload", [tempfile.gettempdir()])].
        debugport (int, optional): set debugport (currently Chrome only). Defaults to 0.
        implicitlywait (int, optional): implicit wait time. Defaults to 10.
        maxpageload (int, optional): max pageload time. Defaults to 20.
        mixin (bool, optional): flag to switch between mixin and setattr mode. Defaults to utils_seleniumxp.mixinactive.
        eventlistener (Optional[utils_seleniumxp.eventfiring_addon.AbstractEventListenerExtended], optional): eventlistener object to activate eventfiring mode. Defaults to None.
        stealthmode (bool, optional): flag to control selenium_stealth mode. Defaults to stealthmode_default.
        optimizedscraping (bool, optional): flag to control optimized settings for scraping (no pictures etc.). Defaults to optimizedscraping_default.
        URL (str, optional): start URL. Defaults to "about:blank".
        alt_cls_webdriverwrapper (Optional[utils_seleniumxp.WebDriver], optional): optional base webdriver class. Defaults to None.
        alt_cls_options (Union[utils_seleniumxp.WebDriver.ChromeOptions, utils_seleniumxp.WebDriver.FirefoxOptions, None], optional): optional browser options class. Defaults to None.

    Returns:
        utils_seleniumxp.WebDriver: webdriver object
    """

    # internal helper for session init routine

    def evaluate_prefsfunction(prefsfunction: Any) -> Union[dict, list[tuple[str, Any]]]:

        if type(prefsfunction) is tuple:
            if len(prefsfunction) == 1:
                prefsfunction = (prefsfunction[0], None)
        else:
            prefsfunction = (prefsfunction, None)
        try:
            if prefsfunction[1] is None:
                return globals()[prefsfunction[0]](browser)
            else:
                return globals()[prefsfunction[0]](browser, *prefsfunction[1])
        except Exception as exc_prefscfuntion_eval:
            err_msg = f"Browser preference set {prefsfunction[0]} not defined."
            raise utils_seleniumxp.ErrorUtilsSelenium(err_msg) from exc_prefscfuntion_eval

    def evaluate_extensionspath(evalfunc: Callable[[str], None], extensionspath: str) -> int:

        extensions_installed = 0
        if "extensions" in config[inisection]:
            extensions = config[inisection]["extensions"].split(", ")
            if len(extensions) > 0:
                if sessionstartlog is not None:
                    sessionstartlog.info("Add-Ons:")
                for extensionID in extensions:
                    # get-function used to enable fallback value
                    extensionfile = config[browser].get("extension_" + extensionID, fallback="")  # type: ignore[index]
                    if extensionfile != "":
                        extensionfile = os.path.join(extensionspath, extensionfile)
                        # extensionfile = pathlib.Path(extensionspath).joinpath(extensionfile)
                        # if extensionfile.is_file():
                        if os.path.isfile(extensionfile):
                            evalfunc(extensionfile)
                            if sessionstartlog is not None:
                                sessionstartlog.info(f"- '{extensionfile}' for '{extensionID}' found.")
                            extensions_installed += + 1
                        elif sessionstartlog is not None:
                            sessionstartlog.info(f"- File '{extensionfile}' for '{extensionID}' not found.")
                    elif sessionstartlog is not None:
                        sessionstartlog.info(f"- Extensionfile for '{extensionID}' not defined.")
        return extensions_installed

    # start ini processing

    regkey_AppPaths: str = "SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\App Paths\\"

    # read config file (standard: seleniumpythonutils.ini)
    config, config_without_default = read_configfile(inifile)

    # activate sessionstart log
    sessionstartlog = set_log_sessionstart(config, inisection)
    if sessionstartlog is not None:
        sessionstartlog.info("----- start webdriver session -----")
        sessionstartlog.info("Parameters:")
        # get arg-values without reference to callable
        cframe = inspect.currentframe()
        args_info = inspect.getargvalues(cframe)
        # collecting args-values in a dictionary
        argsdict = {arg: args_info.locals.get(arg) for arg in args_info.args}
        # log args dictionary
        for key, value in argsdict.items():
            sessionstartlog.info(f"- {key}: {value}")

    # read browser
    if browser is None:
        browser = config[inisection]["browser"]
    # check browser
    if browser not in supported_browsers:
        err_msg = f"Browser '{browser}' not supported."
        raise utils_seleniumxp.ErrorUtilsSelenium(err_msg)

    browserbinarypath: Optional[str] = None
    driverpath: Optional[str] = None

    # check binaries from config (Windows only)
    if os.name == "nt":

        # check browser binary location (for portable installations - only windows)
        import winreg
        browserbinary: str = supported_browsers[browser][0]
        browserbinarypath = check_configpath(config, browser, "browserbinarypath", browserbinary, checkwd=False)
        if browserbinarypath is None or browserbinarypath == "":
            try:
                winreg.QueryValue(winreg.HKEY_LOCAL_MACHINE, regkey_AppPaths + browserbinary)
            except Exception as exc_winreg:
                err_msg = f"Browser '{browser}' not installed and no portable app path provided."
                raise utils_seleniumxp.ErrorUtilsSelenium(err_msg) from exc_winreg

        # use Selenium Manager as per default
        # if not config.getboolean(browser, "use_seleniummanager"):
        if not Utils.to_bool(config[browser]["use_seleniummanager"]):
            # check/set webdriver executable path
            driverbinary: str = supported_browsers[browser][1]
            driverpath = check_configpath(config_without_default, inisection, "driverpath", driverbinary, checkwd=False)
            if driverpath == "":
                driverpath = check_configpath(config, browser, "driverpath", driverbinary, checkwd=True)
            if driverpath == "":
                driverpath = check_usrpath(config, browser, "usrpath_suffix_driver")
            if driverpath is None or driverpath == "":
                err_msg = f"Driver {driverbinary} for Browser '{browser}' could not be found in path defined in config file or current working directory."
                raise utils_seleniumxp.ErrorUtilsSelenium(err_msg)
            else:
                # set PATH
                envpath = os.environ["PATH"]
                if driverpath not in envpath:
                    os.environ["PATH"] += os.pathsep + driverpath

    else:

        # browserbinarypath = check_configpath(config, browser, "", "browserbinarypath", checkwd=False)
        browserbinarypath = config[browser]["browserbinarypath"]

    # optimized scraping (bot detection prevention)
    if optimizedscraping and "optimizedscraping" not in settings:
        settings.append(("optimizedscraping",))

    # create browser options / profile object and set preferences
    if browser == "chrome":

        # set options object - some wrappers define own options class
        browsersettings = utils_seleniumxp.WebDriver.ChromeOptions() if alt_cls_options is None else alt_cls_options()
        assert isinstance(browsersettings, utils_seleniumxp.WebDriver.ChromeOptions)

        # options / preferences
        prefsdict: dict = {}
        for prefsfunction in settings:
            prefs = evaluate_prefsfunction(prefsfunction)
            prefsdict = {**prefsdict, **prefs}  # type: ignore[dict-item]
        if len(prefsdict) > 0:
            browsersettings.add_experimental_option("prefs", prefsdict)
            if sessionstartlog is not None:
                sessionstartlog.info("Preferences / Options:")
                for key, value in prefsdict.items():
                    # log Chrome prefs
                    sessionstartlog.info(f"- {key}: {value}")

        # settings for Chrome
        browsersettings.add_argument("--disable-features=UserAgentClientHint,OptimizationGuideModelDownloading,OptimizationHintsFetching,OptimizationTargetPrediction,OptimizationHints")
        browsersettings.add_argument("--disable-search-engine-choice-screen")
        # settings in addition to stealth extension from stackoverflow
        # https://stackoverflow.com/questions/53039551/selenium-webdriver-modifying-navigator-webdriver-flag-to-prevent-selenium-detec
        # https://stackoverflow.com/questions/67341346/how-to-bypass-cloudflare-bot-protection-in-selenium
        if stealthmode:
            browsersettings.add_experimental_option("excludeSwitches", ["enable-automation"])
            browsersettings.add_experimental_option("useAutomationExtension", False)
            browsersettings.add_argument("--disable-blink-features=AutomationControlled")

        # remote debugging (currently only Chrome)
        if debugport != 0:
            browsersettings.add_argument(f"--remote-debugging-port={debugport}")

    elif browser == "firefox":

        # set options object - some wrappers define own options class
        browsersettings = utils_seleniumxp.WebDriver.FirefoxOptions() if alt_cls_options is None else alt_cls_options()
        assert isinstance(browsersettings, utils_seleniumxp._RemoteWebDriver.FireFoxOptions)

        # options / preferences
        if len(settings) > 0:
            if sessionstartlog is not None:
                sessionstartlog.info("Preferences / Options:")
            for prefsfunction in settings:
                prefs = evaluate_prefsfunction(prefsfunction)
                for pref in prefs:
                    browsersettings.set_preference(pref[0], pref[1])
                    # log FireFox prefs
                    if sessionstartlog is not None:
                        sessionstartlog.info(f"- {pref[0]}: {pref[1]}")

        # does not work anymore with Selenium 4.x
        # browsersettings.set_preference("xpinstall.signatures.required", False)   # for unsigned extensions

        # potentially switch to options.profile to be amended (Selenium 4)

        # settings for Firefox
        # settings in addition to stealth extension from stackoverflow (currently Chrome only)
        if stealthmode:
            pass

        # remote debugging (currently Chrome only)
        if debugport != 0:
            pass

    # set browser binary location in webdriver/browser profile
    if browserbinarypath is not None:
        browsersettings.binary_location = os.path.join(browserbinarypath, browserbinary)

    # install requested extensions
    # Selenium 3.x: not working properly with FireFox but no error message
    # Selenium 4.x: sub-routine evaluate as Chrome includes add-on in options but firefox requires webdriver start first
    extensionspath = check_configpath(config_without_default, inisection, "extensionspath")
    if extensionspath == "":
        extensionspath = check_configpath(config, browser, "extensionspath")
    if extensionspath == "":
        extensionspath = check_usrpath(config, browser, "usrpath_suffix_ext")
    if extensionspath == "":
        err_msg = f"Extensions for Browser '{browser}' could not be found in path defined in config file."
        raise utils_seleniumxp.ErrorUtilsSelenium(err_msg)

    # install requested extensions - Chrome
    if browser == "chrome" and extensionspath != "":

        assert isinstance(browsersettings, utils_seleniumxp.WebDriver.ChromeOptions)
        extensions_installed = evaluate_extensionspath(
            browsersettings.add_extension, extensionspath
        )
        # fix according to https://github.com/SeleniumHQ/selenium/issues/15788
        if extensions_installed > 0:
            browsersettings.add_argument("--disable-features=DisableLoadExtensionCommandLineSwitch")

    # instantiate Selenium webdriver + browser, load blank page
    if browser == "chrome":
        cls_webdriver = utils_seleniumxp.WebDriver.Chrome
        if alt_cls_webdriverwrapper is not None:
            if (browser.upper() in [mro_cls.__module__.upper() for mro_cls in alt_cls_webdriverwrapper.__mro__]) or \
                ("selenium.webdriver.chrome.webdriver" in [mro_cls.__module__ for mro_cls in alt_cls_webdriverwrapper.__mro__]):
                cls_webdriver = alt_cls_webdriverwrapper  # type: ignore[assignment]
    elif browser == "firefox":
        cls_webdriver = utils_seleniumxp.WebDriver.Firefox  # type: ignore[assignment]
        if alt_cls_webdriverwrapper is not None:
            if (browser.upper() in [mro_cls.__module__.upper() for mro_cls in alt_cls_webdriverwrapper.__mro__]) or \
                ("selenium.webdriver.firefox.webdriver" in [mro_cls.__module__ for mro_cls in alt_cls_webdriverwrapper.__mro__]):
                cls_webdriver = alt_cls_webdriverwrapper  # type: ignore[assignment]
    if mixin or utils_seleniumxp.mixinactive:
        webdriver = utils_seleniumxp.webdriver_addon.WebDriverMixedin(cls_webdriver)(options=browsersettings)
    else:
        webdriver = utils_seleniumxp.webdriver_addon.WebDriverMixedinOnly3rdParty(cls_webdriver)(options=browsersettings)
    if eventlistener is not None:
        if mixin or utils_seleniumxp.mixinactive:
            webdriver = utils_seleniumxp.webdriver_addon.EventFiringWebDriverExtendedMixedin(webdriver, eventlistener)  # type: ignore[assignment]
        else:
            webdriver = utils_seleniumxp.eventfiring_addon.EventFiringWebDriverExtended(webdriver, eventlistener)  # type: ignore[assignment]
    webdriver.get("about:blank")

    # install requested extensions - Firefox
    if browser == "firefox" and extensionspath != "":
        extensions_installed = evaluate_extensionspath(
            webdriver.install_addon, extensionspath
        )

    # set implicitly wait if defined
    if implicitlywait != 0:
        webdriver.implicitly_wait(implicitlywait)

    # set maxpage load
    if maxpageload != 0:
        webdriver.set_page_load_timeout(maxpageload)

    # close browser windows from extensions
    if extensions_installed > 0:
        webdriver.wait4HTMLstable()
        if len(webdriver.window_handles) > 1:
            while len(webdriver.window_handles) > 1:
                webdriver.close()

    # activate stealth mode (so far only available for chrome)
    if stealthmode:
        # set full screen-size as anti-bot measure -> would require screen-size not to be changed by calling programm
        webdriver.maximize_window()
        # delete  navigator flag
        webdriver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => false})")
        # browser-specific stuff
        if browser == "chrome":
            webdriver.execute_cdp_cmd(
                "Page.addScriptToEvaluateOnNewDocument",
                {
                    "source": """
                        Object.defineProperty(navigator, 'webdriver', {get: () => undefined})
                            """
                }
            )
            # solve wrapping issue for EventFiringWebDriver
            webdriver_temp = webdriver.wrapped_driver if hasattr(webdriver, "wrapped_driver") else webdriver
            stealth(
                driver=webdriver_temp,
                user_agent="",
                languages=["en-US", "en", "de-DE", "de"],
                vendor="Google Inc.",
                platform="Win32",
                webgl_vendor="Intel Inc.",
                renderer="Intel Iris OpenGL Engine",
                fix_hairline=True,
                run_on_insecure_origins=False
            )

    # get URL
    if URL != "about:blank":
        webdriver.get(URL)

    # close log for session start
    close_log_sessionstart(sessionstartlog)

    # set logging for closing popups
    set_log_closepopup(webdriver, config, inisection)

    # make sure closepopup log is closed when application ends
    atexit.register(close_log_closepopup, webdriver)
    # make sure webdriver process is killed when application ends
    atexit.register(kill_driver_processes, browser, webdriver.service.process.pid)

    return webdriver

kill_driver_processes(browser: str = 'chrome', pid: Optional[int] = None) -> None

kill_driver_processes - routine to kill driver process(es) for automated clean-up

Parameters:

Name Type Description Default
browser str

browser. Defaults to "chrome".

'chrome'
pid Optional[int]

process ID

None
Source code in src/utils_seleniumxp/sessionhandling.py
def kill_driver_processes(browser: str = "chrome", pid: Optional[int] = None) -> None:
    """
    kill_driver_processes - routine to kill driver process(es) for automated clean-up

    Args:
        browser (str, optional): browser. Defaults to "chrome".
        pid (Optional[int], optional): process ID
    """

    if browser not in supported_browsers:
        err_msg = f"Browser '{browser}' not supported."
        raise utils_seleniumxp.ErrorUtilsSelenium(err_msg)

    driverbinary: str = supported_browsers[browser][1]
    with contextlib.suppress(psutil.NoSuchProcess):
        psutil.Process(pid).send_signal(signal.SIGTERM)
    pids: list[int] = []
    try:
        for proc in psutil.process_iter(["name"]):
            if proc.info["name"] == driverbinary:
                if pid is None or pid == proc.pid:
                    pids.append(proc.pid)
    except Exception:
        pass
    for pid in pids:  # noqa: PLR1704
        psutil.Process(pid).send_signal(signal.SIGTERM)

optimizedscraping(browser: str) -> Union[dict, list, None]

optimizedscraping - create settings data object with config/options-settings for optimized scraping

Parameters:

Name Type Description Default
browser str

browser

required

Returns:

Type Description
Union[dict, list, None]

Union[dict, list, None]: settings data object

Source code in src/utils_seleniumxp/sessionhandling.py
def optimizedscraping(browser: str) -> Union[dict, list, None]:
    """
    optimizedscraping - create settings data object with config/options-settings for optimized scraping

    Args:
        browser (str): browser

    Returns:
        Union[dict, list, None]: settings data object
    """

    if browser not in supported_browsers:
        err_msg = f"Browser '{browser}' not supported."
        raise utils_seleniumxp.ErrorUtilsSelenium(err_msg)

    if browser == "chrome":
        return {
            "profile.default_content_setting_values.notifications": 2,
            "profile.managed_default_content_settings.images": 2,
            "profile.managed_default_content_settings.stylesheets": 2,
            "profile.managed_default_content_settings.cookies": 1,
            "profile.managed_default_content_settings.javascript": 1,
            "profile.managed_default_content_settings.plugins": 1,
            "profile.managed_default_content_settings.popups": 2,
            "profile.managed_default_content_settings.geolocation": 2,
            "profile.managed_default_content_settings.media_stream": 2
        }
    elif browser == "firefox":
        return [
            ("permissions.default.stylesheet", 2),
            ("permissions.default.image", 2),
            ("dom.ipc.plugins.enabled.libflashplayer.so", "false")
        ]
    return None

read_configfile(inifile: Optional[str]) -> tuple[configparser.ConfigParser, configparser.ConfigParser]

read_configfile - read config file

Parameters:

Name Type Description Default
inifile Optional[str]

INI file with config data. Defaults to None.

required

Returns:

Type Description
tuple[ConfigParser, ConfigParser]

configparser.ConfigParser: configparser object

Source code in src/utils_seleniumxp/sessionhandling.py
def read_configfile(inifile: Optional[str]) -> tuple[configparser.ConfigParser, configparser.ConfigParser]:
    """
    read_configfile - read config file

    Args:
        inifile (Optional[str], optional): INI file with config data. Defaults to None.

    Returns:
        configparser.ConfigParser: configparser object
    """

    if inifile is None or not os.path.isfile(inifile):
        inifile = os.path.join(os.path.expanduser("~"), "SeleniumPythonUtils.ini")
    if not os.path.isfile(inifile):
        inifile = os.path.join(os.path.dirname(__file__), "SeleniumPythonUtils.ini")

    config = configparser.ConfigParser(interpolation=configparser.ExtendedInterpolation())
    config.read(inifile)

    config_without_default = configparser.ConfigParser(default_section=None, interpolation=configparser.ExtendedInterpolation())
    config_without_default.read(inifile)

    return config, config_without_default

set_log_closepopup(webdriver: utils_seleniumxp._RemoteWebDriver, config: configparser.ConfigParser, inisection: str) -> None

set_log_closepopup - initialize logger for logging close popups

Parameters:

Name Type Description Default
webdriver _RemoteWebDriver

webdriver object

required
config ConfigParser

configuration parser object filed form INI file

required
inisection str

INI file section

required
Source code in src/utils_seleniumxp/sessionhandling.py
def set_log_closepopup(webdriver: utils_seleniumxp._RemoteWebDriver, config: configparser.ConfigParser, inisection: str) -> None:
    """
    set_log_closepopup - initialize logger for logging close popups

    Args:
        webdriver (utils_seleniumxp._RemoteWebDriver): webdriver object
        config (configparser.ConfigParser): configuration parser object filed form INI file
        inisection (str): INI file section
    """

    # if config[inisection]["log_closepopup"]:
    if config.getboolean(inisection, "log_closepopup"):
        logpath = config.get(inisection, "log_closepopup_path")
        setattr(webdriver, "closepopup_logger", Utils.initLogger(loggername="Log_ClosePopup", filename=logpath))

set_log_sessionstart(config: configparser.ConfigParser, inisection: str) -> Optional[logging.Logger]

set_log_sessionstart - initialize logger for logging of session start

Parameters:

Name Type Description Default
config ConfigParser

configuration parser object filed form INI file

required
inisection str

INI file section

required

Returns:

Type Description
Optional[Logger]

Optional[logging.Logger]: logger object

Source code in src/utils_seleniumxp/sessionhandling.py
def set_log_sessionstart(config: configparser.ConfigParser, inisection: str) -> Optional[logging.Logger]:
    """
    set_log_sessionstart - initialize logger for logging of session start

    Args:
        config (configparser.ConfigParser): configuration parser object filed form INI file
        inisection (str): INI file section

    Returns:
        Optional[logging.Logger]: logger object
    """

    # if config[inisection]["log_sessionstart"]:
    if config.getboolean(inisection, "log_sessionstart"):
        logpath = config.get(inisection, "log_sessionstart_path")
        return Utils.initLogger(loggername="Log_SessionStart", filename=logpath)
    else:
        return None