rustingcrab

csv-schema-validator

Version 0.1.2

Crates.io Documentation

A Rust library for validating CSV record data based on rules defined directly in your structs using the #[derive(ValidateCsv)] macro.

Installation

Add the following to your Cargo.toml:

[dependencies]
csv-schema-validator = "0.1.2"
serde = { version = "1.0", features = ["derive"] }
csv = "1.3"
regex = "1.11"
once_cell = "1.21"

Quick Start

use serde::Deserialize;
use csv::Reader;
use csv_schema_validator::{ValidateCsv, ValidationError};

// Define your struct with validation annotations
#[derive(Deserialize, ValidateCsv, Debug)]
struct TestRecord {
    #[validate(range(min = 0.0, max = 100.0))]
    grade: f64,

    #[validate(regex = r"^[A-Z]{3}\d{4}$")]
    code: String,

    #[validate(required, length(min = 10, max = 50), not_blank)]
    name: Option<String>,

    #[validate(custom = "length_validation")]
    comments: String,

    #[validate(required, one_of("short", "medium", "long"))]
    more_comments: Option<String>,

    #[validate(required, not_in("forbidden", "banned"))]
    tag: Option<String>,
}

// Custom validator: comments must be at most 50 characters
fn length_validator(s: &str) -> Result<(), String> {
    if s.len() <= 50 {
        Ok(())
    } else {
        Err("Comments too long".into())
    }
}

fn main() -> Result<(), Box<dyn std::error::Error>> {
    let mut reader = Reader::from_path("data.csv")?;
    for result in reader.deserialize() {
        let rec: Record = result?;
        rec.validate_csv()?;
        println!("Record valid: {:?}", rec);
    }
    Ok(())
}

Usage

Range Validation (since 0.1.0)

#[validate(range(min = 0.0, max = 100.0))]
grade: f64,

Ensures that grade is between 0.0 and 100.0 (inclusive).

Regex Validation (since 0.1.0)

#[validate(regex = r"^[A-Z]{3}\d{4}$")]
code: String,

Validates the field against a regular expression.

Required Validation (since 0.1.0)

#[validate(required)]
name: Option<String>,

Ensures that the Option is not None.

Custom Validation (since 0.1.0)

#[validate(custom = "path::to::func")]
comments: String,

Calls your custom function fn(&T) -> Result<(), String> for additional checks.

Length (since 0.1.1)

#[validate(required, length(min = 10, max = 50))]
name: Option<String>,

Not Blank (since 0.1.2)

Checks for all spaces or all whitespaces field (Strings):

#[validate(required, length(min = 10, max = 50), not_blank)]
name: Option<String>,

One of (since 0.1.2)

Checks if the string has one of the allowed values:

#[validate(required, one_of("short", "medium", "long"))]
more_comments: Option<String>,

Not in (since 0.1.2)

Checks if the string has one of the not allowed values:

#[validate(required, not_in("forbidden", "banned"))]
tag: Option<String>,

Struct check

The macro validates the type it is annotating, only strucs with named fields are allowed:

use serde::Deserialize;
use csv_schema_validator::ValidateCsv;

#[derive(Deserialize, ValidateCsv)]
struct TupleStruct(f64, String);

#[derive(Deserialize, ValidateCsv)]
enum Status {
    Success { code: f64, message: String },
    Error(f64, String),
    Unknown,
}

fn main() {
    let record = TupleStruct(42.0, "ABC1234".to_string());
    let s = Status::Success { code: 200.0, message: "OK".into() };
    let _ = record.validate_csv();
    let _ = s.validate_csv();
}

Trying to compile this code will result in errors:

cargo run
error: only structs with named fields (e.g., `struct S { a: T }`) are supported
 --> src/main.rs:5:19
  |
5 | struct TupleStruct(f64, String);
  |                   ^^^^^^^^^^^^^

error: only structs are supported
  --> src/main.rs:8:1
   |
8  | / enum Status {
9  | |     Success { code: f64, message: String },
10 | |     Error(f64, String),
11 | |     Unknown,
12 | | }
   | |_^

Complete example

This is an example which reads a csv file:

Cargo.toml:

[package]
name = "use-csv-validator"
version = "0.1.1"
edition = "2021"

[dependencies]
csv = "1.1"
serde = { version = "1.0", features = ["derive"] }
csv-schema-validator = "0.1.2"

src/main.rs:

use std::error::Error;
use csv::ReaderBuilder;
use serde::Deserialize;
use csv_schema_validator::{ValidateCsv, ValidationError};

/// Custom validator: ensure comments string isn't too long
fn length_validation(s: &str) -> Result<(), String> {
    if s.len() <= 20 {
        Ok(())
    } else {
        Err("Comments too long".into())
    }
}

#[derive(Deserialize, ValidateCsv, Debug)]
struct TestRecord {
    #[validate(range(min = 0.0, max = 100.0))]
    grade: f64,

    #[validate(regex = r"^[A-Z]{3}\d{4}$")]
    code: String,

    #[validate(required, length(min = 10, max = 50), not_blank)]
    name: Option<String>,

    #[validate(custom = "length_validation")]
    comments: String,

    #[serde(rename = "more")]
    #[validate(required, one_of("short", "medium", "long"))]
    more_comments: Option<String>,

    #[validate(required, not_in("forbidden", "banned"))]
    tag: Option<String>,
}

fn main() -> Result<(), Box<dyn Error>> {
    // open the CSV file placed alongside Cargo.toml
    let mut reader = ReaderBuilder::new()
        .has_headers(true)
        .from_path("data.csv")?;

    // for each record, deserialize and validate
    for (i, result) in reader.deserialize::<TestRecord>().enumerate() {
        let record = result?;
        match record.validate_csv() {
            Ok(()) => println!("Line {}: Record is valid: {:?}", i + 1, record),
            Err(errors) => {
                eprintln!("Line {}: Validation errors:", i + 1);
                for ValidationError { field, message } in errors {
                    eprintln!("  Field `{}`: {}", field, message);
                }
            }
        }
    }

    Ok(())
}

data.csv:

90.0,XYZ5678,Bob Marley,Too long comment indeed,medium,allowed
110.0,XYZ4567,      ,ok,short,allowed
95.0,xWF9101,Charlie,code,long,allowed
110.0,XYZ2345,Dave Copperfield,range,short,allowed
34.0,XYZ6789,,name,medium,allowed
78.0,XYZ7890,Frank,more,invalid comment,allowed
88.0,XYZ4567,Grace,All good,short,
90.0,XYZ3567,Grace of All Times,All good,medium,forbidden
f34s,XYZ3456,Eve,comments,short,invalid grade

Running this example will generate these messages:

Line 1: Record is valid: TestRecord { grade: 85.5, code: "XYZ1234", name: Some("Alice Smith"), comments: "All good", more_comments: Some("short"), tag: Some("allowed") }
Line 2: Validation errors:
  Field `comments`: Comments too long
Line 3: Validation errors:
  Field `grade`: value out of expected range: 0 to 100
  Field `name`: length out of expected range: 10 to 50
  Field `name`: must not be blank or contain only whitespace
Line 4: Validation errors:
  Field `code`: does not match the expected pattern
  Field `name`: length out of expected range: 10 to 50
Line 5: Validation errors:
  Field `grade`: value out of expected range: 0 to 100
Line 6: Validation errors:
  Field `name`: mandatory field
Line 7: Validation errors:
  Field `name`: length out of expected range: 10 to 50
  Field `more_comments`: invalid value
Line 8: Validation errors:
  Field `name`: length out of expected range: 10 to 50
  Field `tag`: mandatory field
Line 9: Validation errors:
  Field `tag`: value not allowed
Error: Error(Deserialize { pos: Some(Position { byte: 448, line: 11, record: 10 }), err: DeserializeError { field: Some(0), kind: ParseFloat(ParseFloatError { kind: Invalid }) } })

Why Use This Crate?

Comparison with csv Crate Validations

While the csv crate provides low‑level parsing and some helper methods, this derive‑based approach offers:

By contrast, using the csv crate directly may require manual loops over records and explicit match/if chains for each validation, leading to more boilerplate and potential for missing checks.

Compatibility

Contributing

Feel free to open issues and submit pull requests. See CONTRIBUTING.md for details.

License

This project is licensed under the MIT License. See the LICENSE file for details.