PokéLLMon 源码解析(五)(2)

本文涉及的产品
公共DNS(含HTTPDNS解析),每月1000万次HTTP解析
云解析 DNS,旗舰版 1个月
全局流量管理 GTM,标准版 1个月
简介: PokéLLMon 源码解析(五)

PokéLLMon 源码解析(五)(1)https://developer.aliyun.com/article/1483726

.\PokeLLMon\poke_env\player\random_player.py

# 定义一个随机玩家基线的模块
"""This module defines a random players baseline
"""

# 从 poke_env.environment 模块中导入 AbstractBattle 类
from poke_env.environment import AbstractBattle
# 从 poke_env.player.battle_order 模块中导入 BattleOrder 类
from poke_env.player.battle_order import BattleOrder
# 从 poke_env.player.player 模块中导入 Player 类
from poke_env.player.player import Player

# 定义一个 RandomPlayer 类,继承自 Player 类
class RandomPlayer(Player):
    # 定义 choose_move 方法,接受一个 AbstractBattle 对象作为参数,返回一个 BattleOrder 对象
    def choose_move(self, battle: AbstractBattle) -> BattleOrder:
        # 调用 Player 类的 choose_random_move 方法选择一个随机的行动
        return self.choose_random_move(battle)

.\PokeLLMon\poke_env\player\utils.py

"""This module contains utility functions and objects related to Player classes.
"""

# 导入必要的模块和对象
import asyncio
import math
from concurrent.futures import Future
from typing import Dict, List, Optional, Tuple

from poke_env.concurrency import POKE_LOOP
from poke_env.data import to_id_str
from poke_env.player.baselines import MaxBasePowerPlayer, SimpleHeuristicsPlayer
from poke_env.player.player import Player
from poke_env.player.random_player import RandomPlayer

# 定义不同玩家类型的评估等级
_EVALUATION_RATINGS = {
    RandomPlayer: 1,
    MaxBasePowerPlayer: 7.665994,
    SimpleHeuristicsPlayer: 128.757145,
}

# 后台交叉评估函数,返回一个 Future 对象
def background_cross_evaluate(
    players: List[Player], n_challenges: int
) -> "Future[Dict[str, Dict[str, Optional[float]]]]":
    return asyncio.run_coroutine_threadsafe(
        cross_evaluate(players, n_challenges), POKE_LOOP
    )

# 异步交叉评估函数
async def cross_evaluate(
    players: List[Player], n_challenges: int
) -> Dict[str, Dict[str, Optional[float]]]:
    # 初始化结果字典
    results: Dict[str, Dict[str, Optional[float]]] = {
        p_1.username: {p_2.username: None for p_2 in players} for p_1 in players
    }
    # 遍历玩家列表,进行交叉评估
    for i, p_1 in enumerate(players):
        for j, p_2 in enumerate(players):
            if j <= i:
                continue
            # 并发发送挑战和接受挑战
            await asyncio.gather(
                p_1.send_challenges(
                    opponent=to_id_str(p_2.username),
                    n_challenges=n_challenges,
                    to_wait=p_2.ps_client.logged_in,
                ),
                p_2.accept_challenges(
                    opponent=to_id_str(p_1.username),
                    n_challenges=n_challenges,
                    packed_team=p_2.next_team,
                ),
            )
            # 更新结果字典中的胜率信息
            results[p_1.username][p_2.username] = p_1.win_rate
            results[p_2.username][p_1.username] = p_2.win_rate

            # 重置战斗状态
            p_1.reset_battles()
            p_2.reset_battles()
    return results

# 从结果中估计实力函数
def _estimate_strength_from_results(
    number_of_games: int, number_of_wins: int, opponent_rating: float
# 估计玩家实力基于游戏结果和对手评分
def evaluate_player(
    number_of_games: int,  # 游戏评估的数量
    number_of_wins: int,  # 赢得的评估游戏数量
    opponent_rating: float,  # 对手的评分
) -> Tuple[float, Tuple[float, float]]:  # 返回估计的玩家实力和95%置信区间的元组

    n, p = number_of_games, number_of_wins / number_of_games  # 计算游戏数量和胜率
    q = 1 - p  # 计算失败率

    if n * p * q < 9:  # 如果无法应用二项分布的正态近似
        raise ValueError(
            "The results obtained in evaluate_player are too extreme to obtain an "
            "accurate player evaluation. You can try to solve this issue by increasing"
            " the total number of battles. Obtained results: %d victories out of %d"
            " games." % (p * n, n)
        )

    estimate = opponent_rating * p / q  # 估计玩家实力
    error = (
        math.sqrt(n * p * q) / n * 1.96
    )  # 95%置信区间的正态分布

    lower_bound = max(0, p - error)  # 下限
    lower_bound = opponent_rating * lower_bound / (1 - lower_bound)  # 下限的评估

    higher_bound = min(1, p + error)  # 上限

    if higher_bound == 1:  # 如果上限为1
        higher_bound = math.inf  # 上限为正无穷大
    else:
        higher_bound = opponent_rating * higher_bound / (1 - higher_bound)  # 上限的评估

    return estimate, (lower_bound, higher_bound)  # 返回估计值和置信区间的元组


# 后台评估玩家
def background_evaluate_player(
    player: Player,  # 玩家对象
    n_battles: int = 1000,  # 战斗数量默认为1000
    n_placement_battles: int = 30,  # 放置战斗数量默认为30
) -> "Future[Tuple[float, Tuple[float, float]]]":  # 返回Future对象,包含估计值和置信区间的元组

    return asyncio.run_coroutine_threadsafe(
        evaluate_player(player, n_battles, n_placement_battles), POKE_LOOP
    )  # 在POKE_LOOP中异步运行评估玩家函数


# 异步评估玩家
async def evaluate_player(
    player: Player,  # 玩家对象
    n_battles: int = 1000,  # 战斗数量默认为1000
    n_placement_battles: int = 30,  # 放置战斗数量默认为30
# 估算玩家实力的函数
def estimate_player_strength(player: Player, n_battles: int, n_placement_battles: int) -> Tuple[float, Tuple[float, float]]:
    """Estimate player strength.

    This functions calculates an estimate of a player's strength, measured as its
    expected performance against a random opponent in a gen 8 random battle. The
    returned number can be interpreted as follows: a strength of k means that the
    probability of the player winning a gen 8 random battle against a random player is k
    times higher than the probability of the random player winning.

    The function returns a tuple containing the best guess based on the played games
    as well as a tuple describing a 95% confidence interval for that estimated strength.

    The actual evaluation can be performed against any baseline player for which an
    accurate strength estimate is available. This baseline is determined at the start of
    the process, by playing a limited number of placement battles and choosing the
    opponent closest to the player in terms of performance.

    :param player: The player to evaluate.
    :type player: Player
    :param n_battles: The total number of battle to perform, including placement
        battles.
    :type n_battles: int
    :param n_placement_battles: Number of placement battles to perform per baseline
        player.
    :type n_placement_battles: int
    :raises: ValueError if the results are too extreme to be interpreted.
    :raises: AssertionError if the player is not configured to play gen8battles or the
        selected number of games to play it too small.
    :return: A tuple containing the estimated player strength and a 95% confidence
        interval
    :rtype: tuple of float and tuple of floats
    """
    # 检查输入
    assert player.format == "gen8randombattle", (
        "Player %s can not be evaluated as its current format (%s) is not "
        "gen8randombattle." % (player, player.format)
    )
    # 如果放置战斗的数量乘以评估等级的数量大于总战斗数量的一半,则进行警告
    if n_placement_battles * len(_EVALUATION_RATINGS) > n_battles // 2:
        player.logger.warning(
            "Number of placement battles reduced from %d to %d due to limited number of"
            " battles (%d). A more accurate evaluation can be performed by increasing "
            "the total number of players.",
            n_placement_battles,
            n_battles // len(_EVALUATION_RATINGS) // 2,
            n_battles,
        )
        # 将放置战斗数量减少到总战斗数量的一半除以评估等级的数量
        n_placement_battles = n_battles // len(_EVALUATION_RATINGS) // 2

    # 断言放置战斗数量大于0,否则抛出异常
    assert (
        n_placement_battles > 0
    ), "Not enough battles to perform placement battles. Please increase the number of "
    "battles to perform to evaluate the player."

    # 初始化放置战斗
    baselines = [p(max_concurrent_battles=n_battles) for p in _EVALUATION_RATINGS]

    # 对每个基准玩家进行放置战斗
    for p in baselines:
        await p.battle_against(player, n_placement_battles)

    # 选择最佳对手进行评估
    best_opp = min(
        baselines, key=lambda p: (abs(p.win_rate - 0.5), -_EVALUATION_RATINGS[type(p)])
    )

    # 执行主要评估
    remaining_battles = n_battles - len(_EVALUATION_RATINGS) * n_placement_battles
    await best_opp.battle_against(player, remaining_battles)

    # 从结果中估计玩家的实力
    return _estimate_strength_from_results(
        best_opp.n_finished_battles,
        best_opp.n_lost_battles,
        _EVALUATION_RATINGS[type(best_opp)],
    )

PokéLLMon 源码解析(五)(3)https://developer.aliyun.com/article/1483728

相关文章
|
22天前
|
缓存 Java 程序员
Map - LinkedHashSet&Map源码解析
Map - LinkedHashSet&Map源码解析
55 0
|
22天前
|
算法 Java 容器
Map - HashSet & HashMap 源码解析
Map - HashSet & HashMap 源码解析
47 0
|
3天前
|
消息中间件 缓存 安全
Future与FutureTask源码解析,接口阻塞问题及解决方案
【11月更文挑战第5天】在Java开发中,多线程编程是提高系统并发性能和资源利用率的重要手段。然而,多线程编程也带来了诸如线程安全、死锁、接口阻塞等一系列复杂问题。本文将深度剖析多线程优化技巧、Future与FutureTask的源码、接口阻塞问题及解决方案,并通过具体业务场景和Java代码示例进行实战演示。
19 3
|
20天前
|
存储
让星星⭐月亮告诉你,HashMap的put方法源码解析及其中两种会触发扩容的场景(足够详尽,有问题欢迎指正~)
`HashMap`的`put`方法通过调用`putVal`实现,主要涉及两个场景下的扩容操作:1. 初始化时,链表数组的初始容量设为16,阈值设为12;2. 当存储的元素个数超过阈值时,链表数组的容量和阈值均翻倍。`putVal`方法处理键值对的插入,包括链表和红黑树的转换,确保高效的数据存取。
47 5
|
22天前
|
Java Spring
Spring底层架构源码解析(三)
Spring底层架构源码解析(三)
|
22天前
|
XML Java 数据格式
Spring底层架构源码解析(二)
Spring底层架构源码解析(二)
|
22天前
|
算法 Java 程序员
Map - TreeSet & TreeMap 源码解析
Map - TreeSet & TreeMap 源码解析
29 0
|
22天前
|
存储 Java C++
Collection-PriorityQueue源码解析
Collection-PriorityQueue源码解析
53 0
|
22天前
|
安全 Java 程序员
Collection-Stack&Queue源码解析
Collection-Stack&Queue源码解析
64 0
|
2月前
|
设计模式 Java 关系型数据库
【Java笔记+踩坑汇总】Java基础+JavaWeb+SSM+SpringBoot+SpringCloud+瑞吉外卖/谷粒商城/学成在线+设计模式+面试题汇总+性能调优/架构设计+源码解析
本文是“Java学习路线”专栏的导航文章,目标是为Java初学者和初中高级工程师提供一套完整的Java学习路线。
347 37

推荐镜像

更多