Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

训练麻将打时候报错,看起来像是在已经 #322

Open
acracker opened this issue Sep 22, 2024 · 0 comments
Open

训练麻将打时候报错,看起来像是在已经 #322

acracker opened this issue Sep 22, 2024 · 0 comments

Comments

@acracker
Copy link

看起来像是dealer.deck中已经没有牌了, 但是还会执行step, 我看了一下代码,也不知道如何退出。

err:

C:\Users\pl\MiniConda3\python.exe D:/code/mahjong/t2.py

Logs saved in experiments/leduc_holdem_cfr_result/
Traceback (most recent call last):
  File "D:/code/mahjong/t2.py", line 131, in <module>
    train(args)
  File "D:/code/mahjong/t2.py", line 65, in train
    agent.train()
  File "C:\Users\pl\MiniConda3\lib\site-packages\rlcard\agents\cfr_agent.py", line 41, in train
    self.traverse_tree(probs, player_id)
  File "C:\Users\pl\MiniConda3\lib\site-packages\rlcard\agents\cfr_agent.py", line 73, in traverse_tree
    utility = self.traverse_tree(new_probs, player_id)
  File "C:\Users\pl\MiniConda3\lib\site-packages\rlcard\agents\cfr_agent.py", line 73, in traverse_tree
    utility = self.traverse_tree(new_probs, player_id)
  File "C:\Users\pl\MiniConda3\lib\site-packages\rlcard\agents\cfr_agent.py", line 73, in traverse_tree
    utility = self.traverse_tree(new_probs, player_id)
  [Previous line repeated 86 more times]
  File "C:\Users\pl\MiniConda3\lib\site-packages\rlcard\agents\cfr_agent.py", line 72, in traverse_tree
    self.env.step(action)
  File "C:\Users\pl\MiniConda3\lib\site-packages\rlcard\envs\env.py", line 84, in step
    next_state, player_id = self.game.step(action)
  File "C:\Users\pl\MiniConda3\lib\site-packages\rlcard\games\mahjong\game.py", line 68, in step
    self.round.proceed_round(self.players, action)
  File "C:\Users\pl\MiniConda3\lib\site-packages\rlcard\games\mahjong\round.py", line 78, in proceed_round
    self.dealer.deal_cards(players[self.current_player], 1)
  File "C:\Users\pl\MiniConda3\lib\site-packages\rlcard\games\mahjong\dealer.py", line 26, in deal_cards
    player.hand.append(self.deck.pop())
IndexError: pop from empty list

code:

# -*- coding: utf-8 -*-
import os
import logging
import argparse

import rlcard
from rlcard.agents import (
    CFRAgent,
    RandomAgent,
)
from rlcard.utils import (
    set_seed,
    tournament,
    Logger,
    plot_curve,
)


def train(args):
    # Make environments, CFR only supports Leduc Holdem
    env = rlcard.make(
        args.env,
        config={
            'seed': 0,
            'allow_step_back': True,
        }
    )
    eval_env = rlcard.make(
        'leduc-holdem',
        config={
            'seed': 0,
        }
    )

    # Seed numpy, torch, random
    set_seed(args.seed)

    # Initilize CFR Agent
    agent = CFRAgent(
        env,
        os.path.join(
            args.log_dir,
            'cfr_model',
            args.env,
        ),
    )
    agent.load()  # If we have saved model, we first load the model

    # Evaluate CFR against random
    eval_env.set_agents([
        agent,
        RandomAgent(num_actions=env.num_actions),
    ])

    # Start training
    with Logger(args.log_dir) as logger:
        for episode in range(args.num_episodes):
            agent.train()
            print('\rIteration {}'.format(episode), end='')
            # Evaluate the performance. Play with Random agents.
            if episode % args.evaluate_every == 0:
                agent.save() # Save model
                logger.log_performance(
                    episode,
                    tournament(
                        eval_env,
                        args.num_eval_games
                    )[0]
                )

        # Get the paths
        csv_path, fig_path = logger.csv_path, logger.fig_path
    # Plot the learning curve
    plot_curve(csv_path, fig_path, 'cfr')


if __name__ == '__main__':
    parser = argparse.ArgumentParser("CFR example in RLCard")
    parser.add_argument(
        '--env',
        type=str,
        default='mahjong',
        choices=[
            'blackjack',
            'leduc-holdem',
            'limit-holdem',
            'doudizhu',
            'mahjong',
            'no-limit-holdem',
            'uno',
            'gin-rummy',
            'bridge',
        ],
    )

    parser.add_argument(
        '--seed',
        type=int,
        default=42,
    )
    parser.add_argument(
        '--num_episodes',
        type=int,
        default=5000,
    )
    parser.add_argument(
        '--num_eval_games',
        type=int,
        default=2000,
    )
    parser.add_argument(
        '--evaluate_every',
        type=int,
        default=100,
    )
    parser.add_argument(
        '--log_dir',
        type=str,
        default='experiments/leduc_holdem_cfr_result/',
    )

    args = parser.parse_args()
    logging.basicConfig(level=logging.DEBUG)
    train(args)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant