Spring Batch Batch - ItemReader

Overview of ItemReader

<br/>

1.ItemReader: Interface to provide data

2. There is only one method in this interface, read(), which reads one data and moves it to the next data. A null must be returned at the end of the reading, otherwise the data has not been read completely.

The interface is defined as follows:

public interface ItemReader<T> { @Nullable T read() throws Exception, UnexpectedInputException, ParseException, NonTransientResourceException;}

<br/>

There are up to 33 default implementations available, basically covering all data source read types.

https://pics7.baidu.com/feed/43a7d933c895d143d6da9beea5b5d2075baf071d.jpeg?token=07e7d5295ac76ce643ae048461edf13d&s=7E2834620F8B404B1AF4D5CA0000A0B1

<br/> <br/> <br/> #Read data from database

<br/> 1. In practice, we all need to read data from the database and do paging reads. The class JDBCPagingItemReader is provided in spring-batch for database data reading.

2. Create user tables in the database

The database data is as follows:

<br/><br/> 3. Use JdbcPagingItemReader to read data from a database

@Configuration
public class DBJdbcDemoJobConfiguration {
    @Autowired
    private JobBuilderFactory jobBuilderFactory;
 
    @Autowired
    private StepBuilderFactory stepBuilderFactory;
 
    @Autowired
    @Qualifier("dbJdbcDemoWriter")
    private ItemWriter<? super Customer> dbJdbcDemoWriter;
 
    @Autowired
    private DataSource dataSource;
 
    @Bean
    public Job DBJdbcDemoJob(){
        return jobBuilderFactory.get("DBJdbcDemoJob")
                .start(dbJdbcDemoStep())
                .build();
 
    }
 
    @Bean
    public Step dbJdbcDemoStep() {
        return stepBuilderFactory.get("dbJdbcDemoStep")
                .<Customer,Customer>chunk(100)
                .reader(dbJdbcDemoReader())
                .writer(dbJdbcDemoWriter)
                .build();
    }
 
    @Bean
    @StepScope
    public JdbcPagingItemReader<Customer> dbJdbcDemoReader() {
        JdbcPagingItemReader<Customer> reader = new JdbcPagingItemReader<>();
 
        reader.setDataSource(this.dataSource);
        reader.setFetchSize(100); //Bulk Read
        reader.setRowMapper((rs,rowNum)->{
				    // Talk about converting read records into objects
            return Customer.builder().id(rs.getLong("id"))
                    .firstName(rs.getString("firstName"))
                    .lastName(rs.getString("lastName"))
                    .birthdate(rs.getString("birthdate"))
                    .build();
 
        });
 
        // Specify sql statement
        MySqlPagingQueryProvider queryProvider = new MySqlPagingQueryProvider();
        queryProvider.setSelectClause("id, firstName, lastName, birthdate");
        queryProvider.setFromClause("from Customer");
        // Specify field sorting
        Map<String, Order> sortKeys = new HashMap<>(1);
        sortKeys.put("id", Order.ASCENDING);
        queryProvider.setSortKeys(sortKeys);
 
        reader.setQueryProvider(queryProvider);
 
        return reader;
 
    }
}

<br/> Output Method ``` @Component("dbJdbcDemoWriter") public class DbJdbcDemoWriter implements ItemWriter<Customer> { @Override public void write(List<? extends Customer> items) throws Exception { for (Customer customer:items) System.out.println(customer);

}

}

<br/><br/><br/>
#Read data from CVS/txt files

<br/>
Place a CSV file in resources in your project, for example, reading customer.csv

File Content

![file](https://graph.baidu.com/resource/222620df58b12c167892e01583251119.png)


FlatFileItemReader

@Configuration public class FlatFileDemoJobConfiguration { @Autowired private JobBuilderFactory jobBuilderFactory;

@Autowired
private StepBuilderFactory stepBuilderFactory;

@Autowired
@Qualifier("flatFileDemoWriter")
private ItemWriter<? super Customer> flatFileDemoWriter;

@Bean
public Job flatFileDemoJob(){
    return jobBuilderFactory.get("flatFileDemoJob")
            .start(flatFileDemoStep())
            .build();

}

@Bean
public Step flatFileDemoStep() {
    return stepBuilderFactory.get("flatFileDemoStep")
            .<Customer,Customer>chunk(100)
            .reader(flatFileDemoReader())
            .writer(flatFileDemoWriter)
            .build();
}

@Bean
@StepScope
public FlatFileItemReader<Customer> flatFileDemoReader() {
    FlatFileItemReader<Customer> reader = new FlatFileItemReader<>();
			// Read File Name
    reader.setResource(new ClassPathResource("customer.csv"));
			// Skip the first line
    reader.setLinesToSkip(1);

    // Parse data
    DelimitedLineTokenizer tokenizer = new DelimitedLineTokenizer();
    tokenizer.setNames(new String[]{"id","firstName","lastName","birthdate"});

    // Parsed data mapped to objects
    DefaultLineMapper<Customer> lineMapper = new DefaultLineMapper<>();
    lineMapper.setLineTokenizer(tokenizer);
    lineMapper.setFieldSetMapper((fieldSet -> {
        return Customer.builder().id(fieldSet.readLong("id"))
                .firstName(fieldSet.readString("firstName"))
                .lastName(fieldSet.readString("lastName"))
                .birthdate(fieldSet.readString("birthdate"))
                .build();
    }));
    lineMapper.afterPropertiesSet();

    reader.setLineMapper(lineMapper);

    return reader;

}

}

<br/>
Output Method

@Component("flatFileDemoWriter") public class FlatFileDemoWriter implements ItemWriter<Customer> { @Override public void write(List<? extends Customer> items) throws Exception { for (Customer customer:items) System.out.println(customer);

}

}

<br/>
Print as follows:

![file](https://graph.baidu.com/resource/222b0a0a173d9936b8ec601583252983.png)

<br/><br/><br/>


##File Read-Write FlatFileItem 
<br/>


What did FlatFileItemWriter do for us using FlatFileItemReader?

1. FlatFileItem can be read and written in a fixed length (especially important for large files). Developers do not need to be concerned about file read and write streams

2. Ensuring things when reading and writing files

<br/>

###Detailed FlatFileItemReader
<br/>
FlatFileItemReader is a class for reading files, typically for processing tabular or text file data.The following two properties of this class must be set

* setResource specifies the location of the file resource: specify the file to read through ClassPathResource (the path of the class) or FileSystemResource (the path of the file system)

* setLineMapper row mapping: Specifies the mapping relationship between rows and entity objects, and the sample code uses DefaultLineMapper

* seEncoding read encoding format, default is'iso-8859-1'

* setStrict strict mode, input file does not exist will throw an exception, blocking the current job; default is true

<br/>
Sample code:

@Bean public FlatFileItemReader<Person> csvItemReader() { FlatFileItemReader<Person> csvItemReader = new FlatFileItemReader<>(); csvItemReader.setResource(new ClassPathResource("data/sample-data.csv")); csvItemReader.setLineMapper(new DefaultLineMapper<Person>() {{ setLineTokenizer(new DelimitedLineTokenizer() {{ setNames(new String[]{"name", "age"}); }}); setFieldSetMapper(new BeanWrapperFieldSetMapper<Person>() {{ setTargetType(Person.class); }}); }}); return csvItemReader; }

<br/>

###Detailed FlatFileItemWriter
<br/>
FlatFileItemWriter is a write class that writes batch data streams to files, using methods that must be understood below:

- The setLineMapper methods of setLineAggregator and FlatFileItemReader are similar. The setLineAggregator method aggregates object attributes into strings, sets delimiters (setDelimiter) when aggregating, and the corresponding character names (setFieldExtractor) for object attributes.

 - The LineAggregator interface is to create an object attribute aggregation string

 - ExtractorLineAggregator is an abstract class that implements the LineAggregator interface.Converts object attributes to arrays using FieldExtractor, an extension class of which is responsible for converting arrays to strings (doAggregate)

   - DelimitedLineAggregator inherits ExtractorLineAggregator.Is a more common way of aggregating, splitting arrays with specified symbols, and using commas by default

   - FormatterLineAggregator inherits ExtractorLineAggregator.Maximum length, minimum length check, and formatting of array strings

 - PassThroughLineAggregator implements the LineAggregator interface and is a simple aggregation method that uses the return value of an object's.toString() as an aggregation string

- RecursiveCollectionLineAggregator implements the LineAggregator interface, traverses the Collection <T>collection, aggregates of collections are split by the system row divider, and aggregation of object fields is optional using the corresponding aggregation method of the LineAggregator interface.

- setResource is the location of the specified output file, which is also required, and the new ClassPathResource("/data/sample-data.txt") is used in the sample code more frequently in actual development than in new FilePathResource()

- setEncoding sets encoding, also iso-8859-1 by default

<br/>

![file](https://graph.baidu.com/resource/222784a7966e50e2edb1101583297793.png)

<br/>

Sample code:

@Bean public FlatFileItemWriter<Person> txtItemWriter() { FlatFileItemWriter<Person> txtItemWriter = new FlatFileItemWriter<>(); txtItemWriter.setAppendAllowed(true); txtItemWriter.setEncoding("UTF-8"); txtItemWriter.setResource(new ClassPathResource("/data/sample-data.txt")); txtItemWriter.setLineAggregator(new DelimitedLineAggregator<Person>() {{ setDelimiter(","); setFieldExtractor(new BeanWrapperFieldExtractor<Person>() {{ setNames(new String[]{"name", "age"}); }}); }}); return txtItemWriter; }

<br/>
<br/>
<br/>


#Read data from an XML file
<br/>

1. Use StaxEventItemReader<T>to read xml data 
2. Example: Add a customer.xml file to the project, taking reading this file as an example

**xml file to read**

![file](https://graph.baidu.com/resource/22266ba2119d2ce23921a01583296313.png)

<br/>


**pom.xml Configuration**

<dependency> <groupId>org.springframework</groupId> <artifactId>spring-oxm</artifactId> </dependency> <dependency> <groupId>com.thoughtworks.xstream</groupId> <artifactId>xstream</artifactId> <version>1.4.7</version> </dependency> ```

<br/>

StaxEventItemReader

@Configuration
public class XmlFileDemoJobConfiguration {
    @Autowired
    private JobBuilderFactory jobBuilderFactory;
 
    @Autowired
    private StepBuilderFactory stepBuilderFactory;
 
    @Autowired
    @Qualifier("xmlFileDemoWriter")
    private ItemWriter<? super Customer> xmlFileDemoWriter;
 
    @Bean
    public Job xmlFileDemoJob(){
        return jobBuilderFactory.get("xmlFileDemoJob")
                .start(xmlFileDemoStep())
                .build();
 
    }
 
    @Bean
    public Step xmlFileDemoStep() {
        return stepBuilderFactory.get("xmlFileDemoStep")
                .<Customer,Customer>chunk(10)
                .reader(xmlFileDemoReader())
                .writer(xmlFileDemoWriter)
                .build();
    }
 
    @Bean
    @StepScope
    public StaxEventItemReader<Customer> xmlFileDemoReader() {
        StaxEventItemReader<Customer> reader = new StaxEventItemReader<>();
 
        reader.setResource(new ClassPathResource("customer.xml"));
        // Specify the root label to process
        reader.setFragmentRootElementName("customer");
				
        // Objects to be converted
        Map<String,Class> map = new HashMap<>();
        map.put("customer",Customer.class);
 
        // Convert xml to object
        XStreamMarshaller unMarshaller = new XStreamMarshaller();
        unMarshaller.setAliases(map);
        reader.setUnmarshaller(unMarshaller);
 
 
        return reader;
 
    }
}

<br/> Output Method ``` @Component("xmlFileDemoWriter") public class XmlFileDemoWriter implements ItemWriter<Customer> { @Override public void write(List<? extends Customer> items) throws Exception { for (Customer customer:items) System.out.println(customer);

}

}

<br/>
Print as follows:

![file](https://graph.baidu.com/resource/222ac917e105e5648fe1c01583297500.png)

<br/><br/><br/>


## XML file processing
<br/>
Processing XML files requires the introduction of the spring-oxm package, which only details the output of xml, similar to XML reading
 The object of the xml write operation is StaxEventItemWriter, similar to the use of FlatFileItemWriter, StaxEventItemWriter and FlatFileItemWriter both have setResource methods, and StaxEventItemWriter is coded utf-8 by default

* setRootTagName sets the root node label name

* setMarshaller specifies the object-node mapping relationship

<br/>
Sample code:

@Bean public StaxEventItemWriter<Person> xmlItemWriter() { StaxEventItemWriter<Person> xmlItemWriter = new StaxEventItemWriter<>(); xmlItemWriter.setRootTagName("root") xmlItemWriter.setEncoding("UTF-8"); xmlItemWriter.setResource(new ClassPathResource("/data/sample-data.xml")); xmlItemWriter.setMarshaller(new XStreamMarshaller() {{ Map<String, Class<Person>> map = new HashMap<>(); map.put("person",Person.class); setAliases(map); }}); return xmlItemWriter; }

<br/>
<br/>
<br/>


#Read data from multiple files
<br/>

1. Very common when reading multiple files next time in a given directory


2. We can use MultiResourceItemReader to register an input file and set up a proxy ItemReader to process each source file


Example: We store three csv files starting with files in the project classpath path at the same time, as follows:

![file](https://graph.baidu.com/resource/222cdef6af54e1131e13b01583300700.png)

<br/>
MultiResourceItemReader

@Configuration public class MultipleFileDemoJobConfiguration { @Autowired private JobBuilderFactory jobBuilderFactory;

@Autowired
private StepBuilderFactory stepBuilderFactory;

@Autowired
@Qualifier("multiFileDeWriter")
private ItemWriter<? super Customer> multiFileDeWriter;

@Value("classpath*:/file*.csv")
private Resource[] inputFiles;

@Bean
public Job multipleFileDemoJob(){
    return jobBuilderFactory.get("multipleFileDemoJob")
            .start(multipleFileDemoStep())
            .build();

}

@Bean
public Step multipleFileDemoStep() {
    return stepBuilderFactory.get("multipleFileDemoStep")
            .<Customer,Customer>chunk(50)
            .reader(multipleResourceItemReader())
            .writer(multiFileDeWriter)
            .build();
}

private MultiResourceItemReader<Customer> multipleResourceItemReader() {

    MultiResourceItemReader<Customer> reader = new MultiResourceItemReader<>();

    reader.setDelegate(flatFileReader());
    reader.setResources(inputFiles);

    return reader;
}

@Bean
public FlatFileItemReader<Customer> flatFileReader() {
    FlatFileItemReader<Customer> reader = new FlatFileItemReader<>();
    reader.setResource(new ClassPathResource("customer.csv"));
   // reader.setLinesToSkip(1);

    DelimitedLineTokenizer tokenizer = new DelimitedLineTokenizer();
    tokenizer.setNames(new String[]{"id","firstName","lastName","birthdate"});

    DefaultLineMapper<Customer> lineMapper = new DefaultLineMapper<>();
    lineMapper.setLineTokenizer(tokenizer);
    lineMapper.setFieldSetMapper((fieldSet -> {
        return Customer.builder().id(fieldSet.readLong("id"))
                .firstName(fieldSet.readString("firstName"))
                .lastName(fieldSet.readString("lastName"))
                .birthdate(fieldSet.readString("birthdate"))
                .build();
    }));
    lineMapper.afterPropertiesSet();

    reader.setLineMapper(lineMapper);

    return reader;

}

}


<br/>
Output Method

@Component("multiFileDeWriter") public class MultiFileDeWriter implements ItemWriter<Customer> { @Override public void write(List<? extends Customer> items) throws Exception { for (Customer customer:items) System.out.println(customer);

}

}

<br/>
//Print as follows:

![file](https://graph.baidu.com/resource/22239a27970444932658a01583300958.png)

<br/>

<br/>
//Reference resources:

https://blog.csdn.net/wuzhiwei549/article/details/88592509

https://blog.51cto.com/13501268/2298081

https://www.jianshu.com/p/9b7088471371

Tags: Programming xml Database Spring encoding

Posted on Mon, 09 Mar 2020 12:59:45 -0400 by dbrown