Codificação de árvore binária

12

Vamos supor que você tenha uma árvore binária completa (ou seja, cada nó interno possui exatamente dois descendentes não vazios). Cada nó contém um número inteiro diferente de zero. Você tem a tarefa de codificar e decodificar a árvore em / de uma lista de números inteiros.

A árvore é armazenada internamente, algo como:

struct node {
  int data;
  struct node *left, *right;
};

E você tem que implementar duas funções:

int *encode(struct node *root);
struct node *decode(int *array);

Depende de você como você codifica e decodifica.

Pontos por:

comprimento mínimo de codificação
complexidade (idealmente linear em número de nós)
originalidade

Não há pontos para o comprimento do código-fonte e você não está restrito a C.

Exemplo de árvore:

code-challenge tree-traversal

— Alexandru
fonte

1

Os requisitos de entrada e saída não prejudicariam.

— Yasir Arsanukaev

2

@Yasir: O algoritmo de codificação é o seu trabalho, então não posso fornecer nenhuma entrada e saída. int *é uma caixa preta para o usuário.

— Alexandru

Existem restrições no intervalo de números inteiros? Mais especificamente, se usarmos uma linguagem com números inteiros arbitrariamente grandes, podemos usar isso? E o tamanho dos dados codificados é medido em número de números inteiros ou número de bytes?

— sepp2k

As funções de codificação e decodificação precisam estar livres de efeitos colaterais (além da alocação de memória)? Ou eles podem, por exemplo, armazenar dados em variáveis globais?

— sepp2k

1

Supondo que os números inteiros de dados sejam reais de 32 bits, existe uma codificação simples que usa apenas 32 * n bits.

— Anon.

2

~ 1,03 N

Parece que todas as respostas até agora levam pelo menos 2 * N * 32 bits para serem armazenadas. (Exceto as soluções em idiomas que permitem valores inteiros maiores que 32 bits, como as soluções Haskell e Ruby - mas elas ainda precisam de bytes extras para codificar sempre que os dados forem maiores que 16K.)

Aqui está uma solução que leva apenas N + teto (N / 32) +1 ints de armazenamento. Isso se aproxima de 1,03125 N para N grande e está abaixo de 1,1 N para todos os N maiores que 20.

A idéia é armazenar um bit extra para cada nó em que 1 é "hasChildren". Esses bits são compactados em N / 32 palavras na frente.

int* encodeHelper(Node* n, int* code, int* pos, int* flag)
{
   int hasKids = (n->left!=0);
   code[*flag/32]|=hasKids<<(*flag&31);
   *flag+=1;
   if (hasKids) bencodeHelper(n->left, code, pos, flag);
   code[*pos]=n->data;
   *pos+=1;
   if (hasKids) bencodeHelper(n->right, code, pos, flag);
   return code;
}

int* encode(Node* h, int* sizeOut)
{
   int nnodes=countNodes(h);
   int nflags = (int)ceil(nnodes/32.0);
   int pos=nflags+1;
   int flag=32;
   int* out;
   *sizeOut = 1+nnodes+nflags;
   out = calloc(*sizeOut, sizeof(int));
   if (!h) return out;
   out[0]=nflags+1; //store start of data
   return encodeHelper(h,out,&pos,&flag);
}

Node* decodeHelper(int* code, int* pos, int* flag)
{
   Node*n = calloc(1, sizeof(Node));
   int hasKids = code[*flag/32]>>(*flag&31)&1;
   *flag+=1;
   if (hasKids) n->left = bdecodeHelper(code, pos, flag);
   n->data = code[*pos];
   *pos+=1;
   if (hasKids) n->right = bdecodeHelper(code, pos, flag);
   return n;
}

Node* decode(int* code)
{
   int flag=32;
   int pos=code[0];
   if (!pos) return NULL;
   return decodeHelper(code, &pos, &flag);
}

(compile a implementação aqui)

— AShelly
fonte

5

Este programa Haskell codifica uma árvore de n nós em n Inteiros. O truque é que ele codifique os dados do nó duplicado e, em seguida, use o bit de ordem inferior para indicar se este é um nó folha ou um nó interior.

Tecnicamente, a Parsermônada aqui é over-kill, já que existe apenas um analisador criado, decodere eu poderia ter colocado a lógica de encadeamento do analisador diretamente lá. Mas, dessa maneira, o decodificador é muito claro e, Parserapesar do tamanho pequeno, é uma estrutura de análise simples e razoável.

import Control.Monad (ap)

data Tree = Leaf Integer | Node Integer Tree Tree
  deriving (Eq, Show)

encode :: Tree -> [Integer]
encode (Leaf n)     = [n*2]
encode (Node n t u) = (n*2+1) : encode t ++ encode u

decode :: [Integer] -> Maybe Tree
decode = fullyParse decoder
  where
    decoder :: Parser Integer Tree
    decoder = do
      i <- next
      let n = i `div` 2
      if even i
        then return (Leaf n)
        else return (Node n) `ap` decoder `ap` decoder

-- A simple Parsing Monad
data Parser a b = P { runParser :: [a] -> Maybe (b, [a]) }

instance Monad (Parser a) where
  return a = P ( \ts -> Just (a, ts) )
  p >>= q  = P ( \ts -> runParser p ts >>= (\(v,ts') -> runParser (q v) ts') )
  fail _   = P ( const Nothing )

next :: Parser a a
next = P n
 where n (t:ts) = Just (t,ts)
       n _      = Nothing

fullyParse :: Parser a b -> [a] -> Maybe b
fullyParse p ts = runParser p ts >>= consumedResult
  where
    consumedResult (v,[]) = Just v
    consumedResult _      = Nothing

-- Example
main :: IO ()
main = do
    putStrLn $ "example:  " ++ show ex
    putStrLn $ "encoding: " ++ show encEx
    putStrLn $ "decoding: " ++ show decEx
    putStrLn $ "worked?   " ++ show worked
  where
    ex = Node 5
          (Leaf 3)
          (Node 2
            (Node 2
              (Leaf 9)
              (Leaf 9)
            )
            (Leaf 1)
          )
    encEx = encode ex
    decEx = decode encEx
    worked = maybe False (ex ==) decEx

Ao executar isso, você obtém:

> runhaskell TreeEncoding.hs 
example:  Node 5 (Leaf 3) (Node 2 (Node 2 (Leaf 9) (Leaf 9)) (Leaf 1))
encoding: [11,6,5,5,18,18,2]
decoding: Just (Node 5 (Leaf 3) (Node 2 (Node 2 (Leaf 9) (Leaf 9)) (Leaf 1)))
worked?   True

— MtnViewMark
fonte

4

Em C

#include <stdlib.h>
#include <stdio.h>

struct Node;
typedef struct Node Node;

struct Node
{
    int   data;
    Node* left;
    Node* right;
};
/* Private Functions */
static int*  encodeNode(Node* tree, int* store);
static Node* decodeNode(int** store);

/* Public Functions */
Node*   newNode(int data,Node* left,Node* right);
void    deleteTree(Node* tree);
int     countNodesTree(Node* tree);
int*    encode(Node *tree);
Node*   decode(int* store);
void    printTree(Node* tree);

Node* newNode(int data,Node* left,Node* right)
{
    Node* result    = (Node*)malloc(sizeof(Node));
    result->data    = data;
    result->left    = left;
    result->right   = right;

    return result;
}

void deleteTree(Node* tree)
{
    if (tree == NULL)
    {   return;
    }

    deleteTree(tree->left);
    deleteTree(tree->right);
    free(tree);
}

int countNodesTree(Node* tree)
{
    if (tree == NULL)
    {   return 0;
    }

    return    countNodesTree(tree->left)
            + countNodesTree(tree->right)
            + 1;
}

void printTree(Node* tree)
{
    if (tree == NULL)
    {
        fprintf(stdout, "- ");
    }
    else
    {
        fprintf(stdout, "%d ", tree->data);
        printTree(tree->left);
        printTree(tree->right);
    }
};

A codificação:

int* encode(Node *tree)
{
    int     nodeCount   = countNodesTree(tree);
    int*    result      = (int*)malloc(sizeof(int) * (nodeCount * 2 + 1));

    // Put the node count in the first element.
    // This makes it easy to also serialize this object for transport
    // i.e. you can put it in a file or a stream (socket) and easily recover it.
    result[0]           = nodeCount;
    encodeNode(tree, result + 1);
    return result;
}

int* encodeNode(Node* tree, int* store)
{
    if (tree != NULL)
    {
        store[0]    = tree->data;
        /*
         * Slight overkill. for this question.
         * But works and makes future enhancement easy
         */
        store[1]    = (tree->left  == NULL ? 0 : 1)
                    + (tree->right == NULL ? 0 : 2);
        store += 2;

        store       = encodeNode(tree->left,  store);
        store       = encodeNode(tree->right, store);
    }
    return store;
}

A decodificação:

Node* decode(int* store)
{
    if (store == NULL)
    { fprintf(stderr, "Bad Input terminating: encode() always return non NULL\n");
      exit(1);
    }

    if (store[0] == 0)
    {
        return NULL;
    }

    store++;
    return decodeNode(&store);
}

Node* decodeNode(int** store)
{
    int     value   = (*store)[0];
    int     flag    = (*store)[1];
    (*store) += 2;

    Node*   left    = flag & 1 ? decodeNode(store) : NULL;
    Node*   right   = flag & 2 ? decodeNode(store) : NULL;

    return newNode(value, left, right);
}

A Principal:

int main()
{
    Node*   t = newNode(5,
                        newNode(3, NULL, NULL),
                        newNode(2,
                                newNode(2,
                                        newNode(9, NULL, NULL),
                                        newNode(9, NULL, NULL)
                                       ),
                                newNode(1, NULL, NULL)
                               )
                       );

    printTree(t);
    fprintf(stdout,"\n");

    int*    e   = encode(t);
    Node*   d   = decode(e);
    printTree(d);
    fprintf(stdout,"\n");

    free(e);
    deleteTree(d);
    deleteTree(t);
}

Nota. Cada nó é codificado como dois números inteiros (mais um int para a contagem de nós).
Portanto, a árvore fornecida codifica assim:

 7, 5, 3, 3, 0, 2, 3, 2, 3, 9, 0, 9, 0 1, 0
 ^  ^
 ^  ^ Node 1
 ^
 Count

— Martin York
fonte

3

Em Ruby, com a mesma codificação que @MtnViewMark :

class Node
        def initialize(data, left = nil, right = nil)
                @data, @left, @right = data, left, right
        end

        def encode
                "%d %s %s" % [@data<<1|1, @left.encode, @right.encode]
        end

        class << self
                def decode(str)
                        _decode(str.split.map &:to_i)
                end

                private

                def _decode(a)
                        n = a.shift
                        if n & 1 == 1
                                Node.new(n>>1, _decode(a), _decode(a))
                        else
                                Leaf.new(n>>1)
                        end
                end
        end
end

class Leaf < Node
        def encode
                (@data<<1).to_s
        end
end

tree=Node.decode("11 6 5 5 18 18 2")
print tree.encode

O custo é um número inteiro por nó ( data << 1 | has_childs):

11 6 5 5 18 18 2

— Arnaud Le Blanc
fonte

Uau - isso parece magro e elegante. No entanto, não é necessário um array de int, não é?

— desconhecido usuário

2

Dada uma árvore binária com nnós, isso a codifica em uma lista de 2n + 1números inteiros. Os algoritmos de codificação e decodificação têm O(n)complexidade.

Eu uso o número 0 como marcador de sentinela ao codificar, indicando quando desdobro a recursão. Então, quando estou decodificando, coloco os nós da árvore que estou criando em uma pilha (das sortes) e uso os 0s na lista para acompanhar onde adicionar o próximo nó. Não tentei, mas tenho certeza de que a decodificação seria interrompida se a árvore não estivesse completa.

#include <math.h>
#include <stdio.h>
#include <stdlib.h>

// Prototypes
struct BTnode;
struct BTnode * bt_add_left(struct BTnode * node, int data);
struct BTnode * bt_add_right(struct BTnode * node, int data);
int             bt_depth(struct BTnode * tree);
int             bt_encode_preorder(int * list, struct BTnode * tree, int index);
struct BTnode * bt_node_create(int data);
int             bt_node_delete(struct BTnode * node);
void            bt_print_preorder(struct BTnode * tree);
int *           encode(struct BTnode * tree);
struct BTnode * decode(int * list);

// Binary tree node
struct BTnode
{
  int data;
  struct BTnode *left, *right;
};

// Add node to this node's left
struct BTnode * bt_add_left(struct BTnode * node, int data)
{
  struct BTnode * newnode = bt_node_create(data);
  node->left = newnode;
  return newnode;
}

// Add node to this node's right
struct BTnode * bt_add_right(struct BTnode * node, int data)
{
  struct BTnode * newnode = bt_node_create(data);
  node->right = newnode;
  return newnode;
}

// Determine depth of the tree
int bt_depth(struct BTnode * tree)
{
  int depth;
  int leftdepth = 0;
  int  rightdepth = 0;
  if( tree == NULL ) return 0;

  if( tree->left != NULL )
    leftdepth = bt_depth(tree->left);
  if( tree->right != NULL )
    rightdepth = bt_depth(tree->right);

  depth = leftdepth;
  if(rightdepth > leftdepth)
    depth = rightdepth;

  return depth + 1;
}

// Recursively add node values to integer list, using 0 as an unfolding sentinel
int bt_encode_preorder(int * list, struct BTnode * tree, int index)
{
  list[ index++ ] = tree->data;

  // This assumes the tree is complete (i.e., if the current node does not have
  // a left child, then it does not have a right child either)
  if( tree->left != NULL )
  {
    index = bt_encode_preorder(list, tree->left, index);
    index = bt_encode_preorder(list, tree->right, index);
  }

  // Add sentinel
  list[ index++ ] = 0;
  return index;
}

// Allocate memory for a node
struct BTnode * bt_node_create(int data)
{
  struct BTnode * newnode = (struct BTnode *) malloc(sizeof(struct BTnode));
  newnode->left = NULL;
  newnode->right = NULL;
  newnode->data = data;
  return newnode;
}

// Free node memory
int bt_node_delete(struct BTnode * node)
{
  int data;
  if(node == NULL)
    return 0;
  data = node->data;

  if(node->left != NULL)
    bt_node_delete(node->left);
  if(node->right != NULL)
    bt_node_delete(node->right);

  free(node);
  return data;
}

// Print all values from the tree in pre-order
void bt_print_preorder(struct BTnode * tree)
{
  printf("%d ", tree->data);
  if(tree->left != NULL)
    bt_print_preorder(tree->left);
  if(tree->right != NULL)
    bt_print_preorder(tree->right);
}

// Decode binary tree structure from a list of integers
struct BTnode * decode(int * list)
{
  struct BTnode * tree;
  struct BTnode * nodestack[ list[0] ];
  int i,j;

  // Handle trivial case
  if( list == NULL ) return NULL;

  tree = bt_node_create( list[1] );
  nodestack[ 1 ] = tree;

  j = 1;
  for(i = 2; i < list[0]; i++)
  {
    if( list[i] == 0 )
    {
      //printf("popping\n");
      j--;
    }
    else
    {
      if( nodestack[j]->left == NULL )
      {
        //printf("Adding %d to left of %d\n", list[i], nodestack[j]->data);
        nodestack[ j+1 ] = bt_add_left(nodestack[j], list[i]);
        j++;
      }
      else
      {
        //printf("Adding %d to right of %d\n", list[i], nodestack[j]->data);
        nodestack[ j+1 ] = bt_add_right(nodestack[j], list[i]);
        j++;
      }
    }
  }

  return tree;
}

// Encode binary tree structure as a list of integers
int * encode(struct BTnode * tree)
{
  int maxnodes, depth, length;
  int * list;
  int j;

  // Handle trivial case
  if(tree == NULL) return NULL;

  // Calculate maximum number of nodes in the tree from the tree depth
  maxnodes = 1;
  depth = bt_depth(tree);
  for(j = 0; j < depth; j++)
  {
    maxnodes += pow(2, j);
  }

  // Allocate memory for the list; we need two ints for each value plus the
  // first value in the list to indicate length
  list = (int *) malloc( ((maxnodes * 2)+1) * sizeof(int));
  length = bt_encode_preorder(list, tree, 1);
  list[ 0 ] = length;
  return list;
}

int main()
{
  struct BTnode * tree;
  struct BTnode * newtree;
  int * list;
  int i;

  /* Provided example

        5
       / \
      3   2
         / \
        2   1
       / \
      9   9
  */
  tree = bt_node_create(5);
  bt_add_left(tree, 3);
  struct BTnode * temp = bt_add_right(tree, 2);
  bt_add_right(temp, 1);
  temp = bt_add_left(temp, 2);
  bt_add_left(temp, 9);
  bt_add_right(temp, 9);
  printf("T (traversed in pre-order):  ");
  bt_print_preorder(tree);
  printf("\n");

  list = encode(tree);
  printf("T (encoded as integer list): ");
  for(i = 1; i < list[0]; i++)
    printf("%d ", list[i]);
  printf("\n");

  newtree = decode(list);
  printf("T' (decoded from int list):  ");
  bt_print_preorder(newtree);
  printf("\n\n");


  // Free memory
  bt_node_delete(tree);
  bt_node_delete(newtree);
  free(list);
  return 0;
}

Salve isso como encode.ccompilado e executado. Este programa usa a árvore de exemplo que você forneceu e eu a testei em algumas outras com êxito.

$ gcc -Wall -lm -o encode encode.c
$ ./encode 
T (traversed in pre-order):  5 3 2 2 9 9 1 
T (encoded as integer list): 5 3 0 2 2 9 0 9 0 0 1 0 0 0 
T' (decoded from int list):  5 3 2 2 9 9 1

— Daniel Standage
fonte

É praticamente o que eu tinha em mente :).

— Alexandru

isso não falhará na decodificação se os dados contiverem um 0?

— precisa saber é o seguinte

@AShelly Ele disse explicitamente que 0 não seria incluído na árvore. Se fosse, sim, isso falharia.

— Daniel Standage

2

Meu código codifica a árvore em uma travessia de pré-encomenda, cada folha em duas entradas (seus dados seguidos por 0) e cada nó interno em um int (seus dados seguidos pelo filho esquerdo e depois pelo direito). Para uma árvore binária completa (como você a define) com n nós, n deve ser ímpar e existem (n + 1) / 2 folhas e (n-1) / 2 nós internos, então isso é 3n / 2 + 1 / 2 inteiros para a codificação.

aviso: não testado, apenas digitado.

struct node {
  int data;
  struct node *left, *right;
};

void encodeInternal(struct node *root, vector<int> *buf) {
  buf->push_back(root->data);
  if (root->left) {
    encodeInternal(root->left, buf);
    encodeInternal(root->right, buf);
  } else {
    buf->push_back(0);
  }
}
int *encode(struct node *root) {
  vector<int> buf;
  encodeInternal(root, &buf);
  return &buf[0];
}

struct decodeResult {
  int encoded_size;
  struct node *n;
}
struct decodeResult decodeInternal(int *array) {
  struct node *n = (struct node*)malloc(sizeof(struct node));
  n->data = array[0];
  if (array[1] == 0) {
    n->left = n->right = NULL;
    return (decodeResult){2, n};
  } else {
    decodeResult L = decodeInternal(array + 1);
    decodeResult R = decodeInternal(array + 1 + L.encoded_size);
    n->left = L.n;
    n->right = R.n;
    return (decodeResult){1 + L.encoded_size + R.encoded_size, n};
  }
}
struct node *decode(int *array) {
  return decodeInternal(array).n;
}

— Keith Randall
fonte

1

Aqui está a minha tentativa. Ele armazena a árvore em uma matriz de tamanho 2 ** profundidade + 1. Ele usa a[0]para manter o tamanho e a[size]o índice do primeiro "nó vazio" encontrado em uma travessia profunda. (Um nó vazio é o local em que um filho seria armazenado se o pai tivesse um). Cada nó vazio contém o índice do próximo nó vazio que será encontrado.

Esse esquema evita a reserva de bits para marcar os filhos de presença, para que cada nó possa usar o intervalo inteiro completo. Também permite árvores desequilibradas - um pai pode ter apenas um filho.

resultado:

empty tree:  [0]
head node only:  [2,5,0]
example tree: [16,5,3,2,5,14,2,1,0,0, 0,0,9,9,15,0,4];

O codificador:

//utility
 int findDepth(Node* n) {
    int l = 0 ,r = 0;
    if (n) {
       l = 1 + findDepth(n->left);
       r = 1 + findDepth(n->right);
    }
    return ( l > r ) ? l : r;
 }

//Encode Function
 int* encodeTree(Node* head) {
    int* out;
    int depth = findDepth(head);
    int size = depth>0;
    while (depth--) size*=2;
    out = calloc(size+1,sizeof(int));
    out[0]=size;
    encodeNode(head, out,1, out+size);
    return out;
 }

 void encodeNode(Node* n, int* a, int idx, int* pEmpty) {
    if (n) {
       a[idx]=n->data;
       encodeNode(n->left,a,idx*2,pEmpty);
       encodeNode(n->right,a,idx*2+1,pEmpty);
    }
    else if (idx<a[0]) {
       *pEmpty = idx;
       pEmpty = a+idx;
    }
 }

O decodificador:

 //Decode Function
 Node* decodeArray(int* a) {
    return (a[0]) ?  decodeNode(a,1,a+a[0]) : NULL;
 }

 Node* decodeNode(int* a, int idx, int* pEmpty) {
    Node* n = NULL;
    if (idx== *pEmpty)
       *pEmpty=a[idx];
    else {
       n = calloc(1,sizeof(Node));
       n->data = a[idx];
       if (idx*2<a[0]) {
          n->left = decodeNode(a, idx*2, pEmpty);
          n->right = decodeNode(a, idx*2+1, pEmpty);
       }
    }
    return n;
 }

(obrigado @daniel sobral por corrigir a formatação)

— AShelly
fonte

1

Scala:

trait Node {
  def encode (): Array[Int]
}

case object Node {
  def decode (a: Array[Int]): InnerNode = {
    if (a.length == 1) InnerNode (a(0)) else {
      val r = InnerNode (a(1)) 
      val l = decode (a.tail.tail) 
      InnerNode (a(0), l, r) 
    }
  }
}

case object Leaf extends Node {
  def encode (): Array[Int] = Array.empty
}

case class InnerNode (val data: Int, var l: Node=Leaf, var r: Node=Leaf) extends Node {
  def encode (): Array[Int] = Array (data) ++ r.encode () ++ l.encode () 
}

object BinTreeTest extends App {
  println (Node.decode (Array (1, 2, 3, 4, 5)).encode.mkString (", "))
}

Essa é uma abordagem que usa sintaxe obsoleta, mas compila sem erros no Scala 2.9.1. Ele gera uma árvore e decodifica todas as árvores codificadas para a mesma matriz usada na codificação. Talvez eu me livre de alguma maneira dos avisos obsoletos hoje.

Uau - isso foi simples. A primeira ideia funcionou imediatamente.

— Usuário desconhecido
fonte