diff --git a/autorag/utils/preprocess.py b/autorag/utils/preprocess.py
index d4fa3b838..1dd7f2f26 100644
--- a/autorag/utils/preprocess.py
+++ b/autorag/utils/preprocess.py
@@ -3,6 +3,8 @@
 import numpy as np
 import pandas as pd
 
+from autorag.utils.util import normalize_unicode
+
 
 def validate_qa_dataset(df: pd.DataFrame):
     columns = ['qid', 'query', 'retrieval_gt', 'generation_gt']
@@ -49,6 +51,8 @@ def cast_generation_gt(gt):
         "query must be string type."
     df['retrieval_gt'] = df['retrieval_gt'].apply(cast_retrieval_gt)
     df['generation_gt'] = df['generation_gt'].apply(cast_generation_gt)
+    df['query'] = df['query'].apply(normalize_unicode)
+    df['generation_gt'] = df['generation_gt'].apply(lambda x: list(map(normalize_unicode, x)))
     return df
 
 
@@ -80,6 +84,19 @@ def make_prev_next_id_metadata(x, id_type: str):
     df['metadata'] = df['metadata'].apply(lambda x: make_prev_next_id_metadata(x, 'prev_id'))
     df['metadata'] = df['metadata'].apply(lambda x: make_prev_next_id_metadata(x, 'next_id'))
 
+    df['contents'] = df['contents'].apply(normalize_unicode)
+
+    def normalize_unicode_metadata(metadata: dict):
+        result = {}
+        for key, value in metadata.items():
+            if isinstance(value, str):
+                result[key] = normalize_unicode(value)
+            else:
+                result[key] = value
+        return result
+
+    df['metadata'] = df['metadata'].apply(normalize_unicode_metadata)
+
     # check every metadata have a prev_id, next_id key
     assert all('prev_id' in metadata for metadata in df['metadata']), "Every metadata must have a prev_id key."
     assert all('next_id' in metadata for metadata in df['metadata']), "Every metadata must have a next_id key."
diff --git a/autorag/utils/util.py b/autorag/utils/util.py
index 6f067d804..0a4077fbf 100644
--- a/autorag/utils/util.py
+++ b/autorag/utils/util.py
@@ -7,6 +7,7 @@
 import os
 import re
 import string
+import unicodedata
 from copy import deepcopy
 from typing import List, Callable, Dict, Optional, Any, Collection
 
@@ -360,3 +361,7 @@ def filter_dict_keys(dict_, keys: List[str]):
 def split_dataframe(df, chunk_size):
     num_chunks = len(df) // chunk_size + 1 if len(df) % chunk_size != 0 else len(df) // chunk_size
     return list(map(lambda x: df[x * chunk_size:(x + 1) * chunk_size], range(num_chunks)))
+
+
+def normalize_unicode(text: str) -> str:
+    return unicodedata.normalize('NFC', text)
diff --git a/tests/autorag/utils/test_util.py b/tests/autorag/utils/test_util.py
index 7429a5429..f977145e9 100644
--- a/tests/autorag/utils/test_util.py
+++ b/tests/autorag/utils/test_util.py
@@ -13,7 +13,7 @@
 from autorag.utils import fetch_contents
 from autorag.utils.util import load_summary_file, result_to_dataframe, \
     make_combinations, explode, replace_value_in_dict, normalize_string, convert_string_to_tuple_in_dict, process_batch, \
-    convert_env_in_dict, openai_truncate_by_token, convert_datetime_string, split_dataframe
+    convert_env_in_dict, openai_truncate_by_token, convert_datetime_string, split_dataframe, normalize_unicode
 from tests.mock import MockLLM
 
 root_dir = pathlib.PurePath(os.path.dirname(os.path.realpath(__file__))).parent.parent
@@ -331,3 +331,18 @@ def test_split_dataframe():
     assert len(df_list_2[0]) == 3
     assert len(df_list_2[-1]) == 1
     assert pd.DataFrame({'a': list(range(3)), 'b': list(range(10, 13))}).equals(df_list_2[0])
+
+
+def test_normalize_unicode():
+    str1 = "전국보행자전용도로표준데이터"
+    str2 = "전국보행자전용도로표준데이터"
+    assert len(str1) == 14
+    assert len(str2) == 34
+    assert str1 != str2
+
+    new_str1 = normalize_unicode(str1)
+    new_str2 = normalize_unicode(str2)
+
+    assert len(new_str1) == 14
+    assert len(new_str2) == 14
+    assert new_str1 == new_str2