4
votes

I'm trying to implement MiniMax algorithm based on Wikipedia pseudocode in Tic-Tac-Toe game written in C. However, I can't manage to obtain the best possible move. Here's my code:

#include <stdio.h>
#include <stdbool.h>
#include <string.h>

// compile and run: gcc minimax.c -std=c99 && ./a.out

int max(int x, int y) {
  return x > y ? x : y;
}

int min(int x, int y) {
  return x < y ? x : y;
}

int whoWon(char ch) {
  switch (ch) {
    case 'O':
      return -1;
      break;
    case 'X':
      return 1;
      break;
  }
}

void printArray(char array[]) {
  printf("# START\n"
         "%c | %c | %c\n"
         "--|---|--\n"
         "%c | %c | %c\n"
         "--|---|--\n"
         "%c | %c | %c\n"
         "# END\n\n", array[0], array[1], array[2], array[3], array[4], array[5], array[6], array[7], array[8]);
}

int anyWinners(char board[])
{
  int i;

  /* check every row */
  for(i = 0; i < 7; i += 3)
    if(board[i] != ' ' && board[i] == board[i+1] && board[i] == board[i+2])
      return whoWon(board[i]);

  /* check every column */
  for(i = 0; i < 3; i++)
    if(board[i] != ' ' && board[i] == board[i+3] && board[i] == board[i+6])
      return whoWon(board[i]);

  /* check diagonals */
  if(board[4] != ' ' && ((board[0] == board[4] && board[0] == board[8]) || (board[2] == board[4] && board[2] == board[6])))
    return whoWon(board[4]);

  return 0;
}

int fullBoard(char board[]) {
  for (int i = 0; i < 9; ++i) {
    if (board[i] == ' ')
      return 0;
  }
  return 1;
}

int minimax(char node[], int depth, bool maximizingPlayer, int * move) {
  int terminalNode = anyWinners(node);
  if (depth == 0 || terminalNode || fullBoard(node)) {
    printf("################## END OF SUBTREE ##################\n");
    return terminalNode;
  }

  int bestValue, val;
  if (maximizingPlayer) {
    bestValue = -2;
    for (int i = 0; i < 9; ++i) {
      if (node[i] == ' ') {
        char child[9];
        strcpy(child, node);
        child[i] = 'X';

        // debug
        printArray(child);

        val = minimax(child, depth - 1, false, move);

        // debug
        printf("X: ^^ i = %d ^^ depth = %d ^^ val = %d\n", i, depth, val);

        //bestValue = max(bestValue, val);
        if (val > bestValue) {
          bestValue = val;
          if (depth == 9) *move = i;
        }
      }
    }
    return bestValue;
  } else {
    bestValue = 2;
    for (int i = 0; i < 9; ++i) {
      if (node[i] == ' ') {
        char child[9];
        strcpy(child, node);
        child[i] = 'O';

        // debug
        printArray(child);

        val = minimax(child, depth - 1, true, move);

        // debug
        printf("O: ^^ i = %d ^^ depth = %d ^^ val = %d\n", i, depth, val);

        bestValue = min(bestValue, val);
      }
    }
    return bestValue;
  }
}

int main() {
  int move = -999; // initialize only for debug

  // X will always win no matter what, first best move for X is 8
  // char board[] = {'O', ' ', ' ',
  //                 ' ', ' ', ' ',
  //                 'X', 'X', ' '};

  // best move for X is 3
  char board[] = {'O', 'O', ' ',
                  ' ', 'X', 'X',
                  ' ', ' ', ' '};

  // Initial call for maximizing player
  int result = minimax(board, 9, true, &move);
  printf("minimax returned: %d\n", result);
  printf("chosen move: %d\n", move);

  return 0;
}

Code prints board for each move with state of all variables. There are also two failing tests commented out in main. Right now algorithm returns bad moves and I can't find the error.

1
this function: 'int whoWon(char ch) ' contains a path through the code where no return (value); statement is encountered. Suggest modifying the code, perhaps by adding a default case or revise the logic to avoid the bad execution path problem.user3629249

1 Answers

2
votes

I see two problems:

  1. The heuristic is wrong
  2. There is a problem with strcpy.

The heuristic is wrong

The Wikipedia pseudo-code says:

if depth = 0 or node is a terminal node
    return the heuristic value of node

Your implementation does this:

if depth = 0 or node is a terminal node
    return 1 if X wins, -1 if O wins, 0 if it is a draw

But that isn't a very good heuristic. With that heuristic, all the possible ways that X could win are equally weighted. So if X finds a way to win in 3 moves, that is weighted just the same as if X finds a way to win in 2 moves, and that is weighted just the same as if X finds a way to win in 1 move.

So, here is what happens in your test case:

  1. X tries position 2.
  2. O tries position 3.
  3. X tries position 6.
  4. This is a terminal node. X wins. So return positive 1.

Heuristic for this decision path = 1

Another possibility it hits is:

  1. X tries position 3.
  2. This is a terminal node. X wins. So return positive 1.

Heuristic for this decision path = 1

Since both of these solutions have the same heuristic, so they are both of equal value. You probably meant for this solution to be suboptimal, because it took too many moves to win. I suggest a heuristic based on the number of moves it took to get here, multiplied who the winner is. So if X wins in 1 moves, heuristic is 5000. If X wins in 2 moves, then heuristic is 2500. If O wins in 2 moves, heuristic is -2500. Something like that.

There is a problem with strcpy

This line:

strcpy(child, node);

should be:

memcpy(child, node, 9*sizeof(char));

Because "node" is not a null terminated string. When I run this on VS2013/Windows 8.1 my output is garbage because. You might be getting lucky on your platform.