import os

DATA_DIR = "."  # set your base data directory


def split_file_by_lines(input_file, chunk_size_mb=10):
    chunk_size = chunk_size_mb * 1024 * 1024  # bytes
    base_name = os.path.splitext(os.path.basename(input_file))[0]

    os.makedirs(os.path.join(DATA_DIR, "proveit"), exist_ok=True)

    chunk_num = 1
    max_rows = 1000
    buffer = []

    if os.path.isfile(input_file):
        with open(input_file, 'r', encoding='utf-8') as f:
            for line in f:
                buffer.append(line)

                if len(buffer) == max_rows:
                    chunk_file = os.path.join(DATA_DIR, "proveit", f"{base_name}.part{chunk_num}.json")
                    print(chunk_file)
                    with open(chunk_file, 'w', encoding='utf-8') as cf:
                        cf.writelines(buffer)
                    # print(f"Created ({current_size} bytes)")

                    # Reset for next chunk
                    buffer = []
                    chunk_num += 1

            # Write remaining lines
            if buffer:
                chunk_file = os.path.join(DATA_DIR, "proveit", f"{base_name}.part{chunk_num}.json")
                print(chunk_file)
                with open(chunk_file, 'w', encoding='utf-8') as cf:
                    cf.writelines(buffer)
                chunk_num += 1


# Example usage
for file in os.listdir(DATA_DIR):
    full_path = os.path.join(DATA_DIR, file)
    if os.path.isfile(full_path) and full_path.endswith("json"):
        print(full_path)
        split_file_by_lines(full_path, chunk_size_mb=100)
