from reference_transformer.tokenizer import StupidTokenizer
-
@pytest.fixture
def test_data():
- with open("./data/shakespeare.txt", 'r', encoding="utf8") as f:
+ with open("./data/shakespeare.txt", "r", encoding="utf8") as f:
data = f.read()
return data
tokenizer = StupidTokenizer(test_data)
encoded = tokenizer.encode(test_data)
decoded = tokenizer.decode(encoded)
- assert test_data == decoded, "EncodeDecode does not reproduce original data..."
-
-
+ assert test_data == decoded, "EncodeDecode does not reproduce original data."